In [75]:
import pandas as pd
import numpy as np

def process_event_log_dataframe(df):
    df['Year'] = pd.to_datetime(df['Year'], format='%Y')
    df['Day'] = df['Day'] - 1
    df['Day'] = pd.to_timedelta(df['Day'], 'D')
    df['Hour'] = pd.to_timedelta(df['Hour'], 'h')
    df['Minute'] = pd.to_timedelta(df['Minute'], 'm')
    df['Second'] = pd.to_timedelta(df['Second'], 'S')
    df['Date'] = df['Year'] + df['Day'] + df['Hour'] + df['Minute'] + df['Second']
    df = df.drop(['Year', 'Day', 'Hour', 'Minute', 'Second', 'Zone', 'Type', 'Info'], axis=1)
    # reverse, since originally they are later first and earlier last
    df = df.iloc[::-1]
    df = df.reset_index(drop=True)
    return df


def split_event_dataframe_into_event_types(df):
    event_types = df['TypeText'].unique()
    event_dfs = {}
    for event_type in event_types:
        event_dfs[event_type] = df[df['TypeText'] == event_type].reset_index(drop=True)
    return event_dfs

In [14]:
df = pd.read_csv('data/EventLogData-WarbyParker.csv')
df = process_event_log_dataframe(df)

In [15]:
df

Unnamed: 0,TypeText,Date
0,start,2021-07-03 15:54:21.960
1,exit,2021-07-03 15:54:41.050
2,exit,2021-07-03 15:54:41.520
3,entrance,2021-07-03 15:55:35.270
4,entrance,2021-07-03 15:55:37.160
5,entrance,2021-07-03 15:55:40.060
6,entrance,2021-07-03 15:55:47.020
7,entrance,2021-07-03 15:55:47.310
8,entrance,2021-07-03 15:56:54.210
9,exit,2021-07-03 15:58:15.810


In [20]:
dfs = split_event_dataframe_into_event_types(df)

In [82]:
start_time = dfs['start']['Date'][0]
stop_time = dfs['stop']['Date'][0]
start_n_people = 9
n_integral = start_n_people * (stop_time - start_time) + (stop_time - dfs['entrance']['Date']).sum() - (stop_time - dfs['exit']['Date']).sum()
alpha = 0.7
n_entrances = len(dfs['entrance'])
n_exits = len(dfs['exit'])
est_mean_time = n_integral / (alpha * n_exits + (1 - alpha) * n_entrances)
est_mean_time

Timedelta('0 days 00:06:51.529182879')

In [79]:
(dfs['entrance']['Date'].iloc[1:].to_numpy() - dfs['entrance']['Date'].iloc[:-1].to_numpy()) / np.timedelta64(1, 's')

array([  1.89,   2.9 ,   6.96,   0.29,  66.9 , 158.93,   0.46, 272.3 ,
        10.22,   2.49,   0.7 ,   3.19, 114.82,   1.72,   0.45,  76.34,
         0.53,  38.68,   2.98,   0.53,  90.42,  98.42,   0.71,  72.51])

In [92]:
n_entrances / ((stop_time - start_time)/np.timedelta64(1, 'm')) * (6+51.529/60)

9.183127443454666

In [93]:
start_n_people * (stop_time - start_time) / (alpha * n_exits + (1 - alpha) * n_entrances)

Timedelta('0 days 00:06:32.336964980')