In [16]:
import pandas as pd
import pickle
from os import listdir
from os.path import join
import plotly.graph_objects as go
import regex as re
import itertools

STEPS_IN_HOUR = 120
INPUT_PATH = 'pickle'

In [14]:
data_collector_files = [f for f in listdir(INPUT_PATH) if re.match('datacollector', f)]
data_collector_files

['datacollector_validazione2_1628161459.pkl',
 'datacollector_validazione1_1628161040.pkl']

In [15]:
def read_pickle_file(filename):
    with open(join(INPUT_PATH, filename), 'rb') as f:
        return pickle.load(f)

def read_simulation(filename, df_arrivals, steps_in_hour=STEPS_IN_HOUR):
    simulation_dict = read_pickle_file(filename)
    simulation_df = pd.DataFrame(simulation_dict)
    # Add simulation name
    simulation_name = filename.split('_')[1]
    full_length = steps_in_hour * len(df_arrivals)
    simulation_df["simulation_name"] = [simulation_name] * (full_length + (len(simulation_df) % full_length))
    # Add hours
    hours = [([hour]*steps_in_hour) for hour in df_arrivals["hour"]]
    hours = hours + [[df_arrivals["hour"].iloc[-1]] * (len(simulation_df) % full_length)]
    simulation_df["hour"] = list(itertools.chain(*hours))

    return simulation_df

df_arrivals = read_pickle_file('df_arrivals_aggregated.pkl')
df_simulations = pd.concat([read_simulation(x, df_arrivals) for x in data_collector_files])
df_simulations.head()


Unnamed: 0,Total_customers,Density_total,Flow_total,Density_standard,Flow_standard,Density_self_scan,Flow_self_scan,Total_steps,Avg_waiting_times_standard,Avg_waiting_times_self_scan,simulation_name,hour
0,1,0.038462,0.038462,0.038462,0.038462,0,0,2,0.0,0,validazione2,8
1,2,0.076923,0.038462,0.076923,0.038462,0,0,3,0.0,0,validazione2,8
2,4,0.153846,0.076923,0.153846,0.076923,0,0,4,0.0,0,validazione2,8
3,5,0.192308,0.038462,0.192308,0.038462,0,0,5,0.0,0,validazione2,8
4,6,0.230769,0.038462,0.230769,0.038462,0,0,6,0.0,0,validazione2,8


In [27]:
target_simulation = df_simulations.query('simulation_name == "validazione1"')
df = target_simulation
df.groupby(by=['hour']).mean().reset_index()

Unnamed: 0,hour,Total_customers,Density_total,Flow_total,Density_standard,Flow_standard,Density_self_scan,Flow_self_scan,Total_steps,Avg_waiting_times_standard,Avg_waiting_times_self_scan
0,8,22.925,0.881731,0.034936,0.881731,0.034936,0.0,0.0,61.5,1.763527,0.0
1,9,49.833333,1.916667,0.048077,1.916667,0.048077,0.0,0.0,181.5,4.092611,0.0
2,10,74.541667,2.866987,0.060897,2.866987,0.060897,0.0,0.0,301.5,8.938857,0.0
3,11,91.075,3.502885,0.072115,3.502885,0.072115,0.0,0.0,421.5,11.9823,0.0
4,12,86.383333,3.322436,0.075,3.322436,0.075,0.0,0.0,541.5,13.439497,0.0
5,13,96.016667,3.692949,0.070513,3.692949,0.070513,0.0,0.0,661.5,13.516572,0.0
6,14,94.075,3.618269,0.066987,3.618269,0.066987,0.0,0.0,781.5,14.372284,0.0
7,15,71.766667,2.760256,0.063462,2.760256,0.063462,0.0,0.0,901.5,15.178848,0.0
8,16,82.533333,3.174359,0.0625,3.174359,0.0625,0.0,0.0,1021.5,14.842577,0.0
9,17,88.141667,3.390064,0.066346,3.390064,0.066346,0.0,0.0,1141.5,15.168307,0.0


In [35]:
aggregate_hours = lambda df: df.groupby(by=['hour']).mean().reset_index()

def add_simulation_to_plot(fig, df_simulations, simulation_name, feature, normalize=True):
    target_simulation = df_simulations.query(f'simulation_name == "{simulation_name}"')
    df = aggregate_hours(target_simulation)
    y_values = df[feature]
    if normalize:
        y_values = y_values / sum(y_values)
    fig.add_trace(go.Scatter(x=df["hour"], y=y_values,
                             mode='lines', name=simulation_name))
    return fig

def plot_simulation_comparison(df_arrivals, normalize=True):

    fig = go.Figure()
    y_values = df_arrivals["value"]
    dtick = 15
    if normalize:
        y_values = y_values / sum(y_values)
        dtick = 0.01
    fig.add_trace(go.Scatter(x=df_arrivals["hour"], y=y_values,
                             mode='lines', name='Ground truth'))

    fig.update_layout(
        xaxis = dict(
            tickmode = 'linear',
            tick0 = 0,
            dtick = 1
        ),
        yaxis = dict(
            tickmode = 'linear',
            tick0 = 0,
            dtick = dtick
        ),
        title={
            'text': 'Real data vs simulation' + ' (Normalized)' if normalize else '',
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'},
        xaxis_title="Hour",
        yaxis_title="Number of incoming customers" + ' (Normalized)' if normalize else '',
        legend_title="Distribution",
    )
    return fig

fig = plot_simulation_comparison(df_arrivals)
fig = add_simulation_to_plot(fig, df_simulations, 'validazione1', "Total_customers")
fig = add_simulation_to_plot(fig, df_simulations, 'validazione2', "Total_customers")
fig.show()



