In [187]:
import pandas as pd
import pickle
from os import listdir
from os.path import join
import plotly.graph_objects as go
import regex as re
import itertools

STEPS_IN_HOUR = 120
INPUT_PATH = 'pickle'

In [188]:
data_collector_files = [f for f in listdir(INPUT_PATH) if re.match('datacollector', f)]
data_collector_files

['datacollector$prob_coda4_jockey2$1630369298.pkl',
 'datacollector$coda3_jockey2$1630369287.pkl',
 'datacollector$validazione_1$1630369269.pkl',
 'datacollector$prob_coda1_jockey1$1630369261.pkl',
 'datacollector$coda2_jockey2$1630369254.pkl',
 'datacollector$validazione_3$1630369291.pkl',
 'datacollector$coda3_jockey1$1630369285.pkl',
 'datacollector$prob_coda2_jockey2$1630369266.pkl',
 'datacollector$prob_coda3_jockey1$1630369303.pkl',
 'datacollector$validazione_4$1630369283.pkl',
 'datacollector$codacondivisa$1630369237.pkl',
 'datacollector$coda4_jockey1$1630369264.pkl',
 'datacollector$prob_coda1_jockey2$1630369294.pkl',
 'datacollector$validazione_2$1630369272.pkl',
 'datacollector$prob_coda4_jockey1$1630369280.pkl',
 'datacollector$prob_coda3_jockey2$1630369245.pkl',
 'datacollector$self_scan$1630369279.pkl',
 'datacollector$coda1_jockey1$1630369264.pkl',
 'datacollector$coda1_jockey2$1630369239.pkl',
 'datacollector$coda4_jockey2$1630369303.pkl',
 'datacollector$coda2_jockey1

In [191]:
def read_pickle_file(filename):
    with open(join(INPUT_PATH, filename), 'rb') as f:
        return pickle.load(f)

def read_simulation(filename, df_arrivals, steps_in_hour=STEPS_IN_HOUR):
    simulation_dict = read_pickle_file(filename)
    simulation_df = pd.DataFrame(simulation_dict)
    # Add simulation name
    simulation_name = filename.split('$')[1]
    full_length = steps_in_hour * len(df_arrivals)
    q, r = len(simulation_df) // full_length, len(simulation_df) % full_length
    simulation_df["simulation_name"] = [simulation_name] * (q * full_length + r)
    # Add hours
    hours = [([hour]*steps_in_hour) for hour in df_arrivals["hour"]] * q
    hours = hours + [[df_arrivals["hour"].iloc[-1]] * (len(simulation_df) % full_length)]
    simulation_df["hour"] = list(itertools.chain(*hours))

    return simulation_df

df_arrivals = read_pickle_file('df_arrivals_aggregated.pkl')

df_simulations = pd.concat([read_simulation(x, df_arrivals) for x in data_collector_files])
df_simulations.head()


Unnamed: 0,Total_customers,Density_total,Flow_total,Density_standard,Flow_standard,Density_self_scan,Flow_self_scan,Total_steps,Avg_waiting_times_standard,Avg_waiting_times_self_scan,Number_exiting_customers,simulation_name,hour
0,1,0.045455,0.045455,0.0,0.0,0.2,0.2,2,0.0,0.0,0,prob_coda4_jockey2,8
1,2,0.090909,0.045455,0.058824,0.058824,0.2,0.0,3,0.0,0.0,0,prob_coda4_jockey2,8
2,3,0.136364,0.045455,0.117647,0.058824,0.2,0.0,4,0.0,0.0,0,prob_coda4_jockey2,8
3,3,0.136364,0.0,0.117647,0.0,0.2,0.0,5,0.0,0.0,0,prob_coda4_jockey2,8
4,4,0.181818,0.045455,0.176471,0.058824,0.2,0.0,6,0.0,0.0,0,prob_coda4_jockey2,8


In [193]:
aggregate_hours = lambda df: df.groupby(by=['hour']).mean().reset_index()

def add_simulation_to_plot(fig, df_simulations, simulation_name, feature, normalize=True):
    target_simulation = df_simulations.query(f'simulation_name == "{simulation_name}"')
    df = aggregate_hours(target_simulation)
    y_values = df[feature]
    if normalize:
        y_values = y_values / sum(y_values)
    fig.add_trace(go.Scatter(x=df["hour"], y=y_values,
                             mode='lines+markers', name=simulation_name))
    return fig

def plot_simulation_comparison(df_arrivals, normalize=True):

    fig = go.Figure()
    y_values = df_arrivals["value"]
    dtick = 15
    if normalize:
        y_values = y_values / sum(y_values)
        dtick = 0.01
    fig.add_trace(go.Scatter(x=df_arrivals["hour"], y=y_values,
                             mode='lines+markers', name='Ground truth'))

    fig.update_layout(
        xaxis = dict(
            tickmode = 'linear',
            tick0 = 0,
            dtick = 1
        ),
        yaxis = dict(
            tickmode = 'linear',
            tick0 = 0,
            dtick = dtick
        ),
        title={
            'text': 'Real data vs simulation' + ' (Normalized)' if normalize else '',
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'},
        xaxis_title="Hour",
        yaxis_title="Number of incoming customers" + ' (Normalized)' if normalize else '',
        legend_title="Distribution",
    )
    return fig

In [195]:
fig = plot_simulation_comparison(df_arrivals, normalize=False)
fig = add_simulation_to_plot(fig, df_simulations, 'validazione_1', "Total_customers", normalize=False)
fig = add_simulation_to_plot(fig, df_simulations, 'validazione_2', "Total_customers", normalize=False)
fig.show()