In [1]:
from pathlib import Path
import pandas as pd
import numpy as np

In [2]:
filename = Path('./data/processed/20210818_evening_staff_activities.csv')
df_staff_activities = pd.read_csv(filename, parse_dates=['start_time', 'end_time'])

dtypes = {'group_size': 'int', 'fully_captured': 'bool', 'visit_time': 'float'}
filename = Path('./data/processed/20210818_evening_guest_demand.csv')
df_guest_demand = pd.read_csv(filename, dtype=dtypes, parse_dates=['start_time', 'end_time'])

In [3]:
import datetime

measurement_pause = (datetime.datetime(2021, 8, 18, 18, 59), datetime.datetime(2021, 8, 18, 20, 4))

measurement_pause

In [23]:
from collections import defaultdict
import datetime

activities_per_table = defaultdict(list)
last_actvity_processed = {}

for _, row in df_guest_demand.iterrows():
    table_id = row['table_id']
    start_time = row['start_time']
    end_time = row['end_time']

    if table_id not in last_actvity_processed:
        df_actvities_gap = df_staff_activities[(df_staff_activities['table_id'] == table_id) & (df_staff_activities['start_time'] < start_time)]
    else:
        df_actvities_gap = df_staff_activities[(df_staff_activities['table_id'] == table_id) & (df_staff_activities['start_time'] < start_time) & (df_staff_activities['start_time'] > last_actvity_processed[table_id])]

    if len(df_actvities_gap) > 0:
        activities_per_table[table_id].append((False,df_actvities_gap))
            
    if pd.isnull(end_time):
        df_activities = df_staff_activities[(df_staff_activities['table_id'] == table_id) & (df_staff_activities['start_time'] >= start_time)]
    else:   
        df_activities = df_staff_activities[(df_staff_activities['table_id'] == table_id) & (df_staff_activities['start_time'] >= start_time) & (df_staff_activities['end_time'] <= end_time)]
        
    df_activities_before = df_activities[df_activities['start_time'] <= measurement_pause[0]]
    df_activities_after = df_activities[df_activities['start_time'] > measurement_pause[0]]
    
    if len(df_activities) != len(df_activities_before) + len(df_activities_after):
        raise Exception('Missing')
    
    if len(df_activities) == 0:
        print(f'{table_id} ({start_time} - {end_time}) has no activities')
        continue
    
    if len(df_activities_before) > 0:    
        activities_per_table[table_id].append((True, df_activities_before))
        if len(df_activities_after) == 0:
            last_actvity_processed[table_id] = df_activities_before['start_time'].iloc[-1]
            
    if len(df_activities_after) > 0:    
        activities_per_table[table_id].append((True, df_activities_after))
        last_actvity_processed[table_id] = df_activities_after['start_time'].iloc[-1]
        
for table_id, end_time in last_actvity_processed.items():
    df_actvities_gap = df_staff_activities[(df_staff_activities['table_id'] == table_id) & (df_staff_activities['start_time'] > end_time)]
    if len(df_actvities_gap) > 0:
        activities_per_table[table_id].append((False, df_actvities_gap))
       
    

14 (2021-08-18 17:12:11 - 2021-08-18 17:37:41) has no activities
20 (2021-08-18 18:28:32 - 2021-08-18 19:20:30) has no activities
33 (2021-08-18 18:11:12 - 2021-08-18 20:13:42) has no activities
23 (2021-08-18 17:33:44 - NaT) has no activities
B3 (2021-08-18 20:10:07 - NaT) has no activities
33 (2021-08-18 20:54:24 - NaT) has no activities


In [21]:
list(activities_per_table.keys())

['15',
 '16',
 '54',
 '56',
 'B2',
 '12',
 'B3',
 '11',
 'B1',
 '31',
 '36',
 '52',
 '17',
 '13',
 '35',
 '14',
 '55',
 '53',
 '51',
 '20']

Can be used for non-guest activity count:
- 15 (One error)
- 54
- 56
- B2
- B1
- 31
- 36
- 52


In [80]:
activities_per_table['20']

[(True,
      table_id staff_id          start_time            end_time activity_type
  290       20       HM 2021-08-18 20:02:02 2021-08-18 20:04:24             S
  292       20        Q 2021-08-18 20:04:09 2021-08-18 20:04:21             S
  293       20       HM 2021-08-18 20:04:39 2021-08-18 20:05:05             S
  301       20        A 2021-08-18 20:12:09 2021-08-18 20:12:23             S
  302       20        A 2021-08-18 20:12:45 2021-08-18 20:12:52       CULTERY
  303       20        O 2021-08-18 20:13:00 2021-08-18 20:13:09       CULTERY
  308       20       AO 2021-08-18 20:17:54 2021-08-18 20:18:36             S
  310       20        H 2021-08-18 20:18:27 2021-08-18 20:18:54             S
  313       20        H 2021-08-18 20:19:34 2021-08-18 20:19:49             S
  319       20       AO 2021-08-18 20:25:50 2021-08-18 20:26:04             S
  324       20        O 2021-08-18 20:30:24 2021-08-18 20:30:45             R
  327       20        Q 2021-08-18 20:33:35 2021-08-18 2

In [45]:
df_staff_activities['activity_type'] = df_staff_activities['activity_type'].str.upper()
pd.unique(df_staff_activities['activity_type'])

array(['S', 'C', 'W', 'O', 'S-O', 'CUTLERY', 'S-R', 'R', 'W-O', 'R-O',
       'F', 'C ', 'CUTLERY - O', 'F-O', 'F-R', 'S-O-R', 'P', 'B',
       'GUIDE TO TABLE', 'O-R', 'R-F', 'F-R-O', 'S-F', 'R&P', 'F&O',
       'F&R', 'CULTERY', 'P&R', 'F&P', 'S&R', 'S&O', 'F&R&P'],
      dtype=object)

In [61]:
activity_types = {
    'base_first': ('S', 'CUTLERY', 'B', 'W'),
    'base_after': ('R', 'C'),
    'no_base': ('F', 'O', 'P')
}

activity_type_count = {key:0 for key in activity_types}

check = None

for key, activity_types_of_base_type in activity_types.items():
    for activity_type in activity_types_of_base_type:
        contains_activity = df_staff_activities['activity_type'].str.contains(activity_type)
        activity_type_count[key] += len(df_staff_activities[contains_activity == True])
        if check is None:
            check = contains_activity
        else:
            check = check | contains_activity

print(activity_type_count)
count_sum = sum(activity_type_count.values())
print({key:value/count_sum for key, value in activity_type_count.items()})
df_staff_activities[check == False]
        

{'base_first': 191, 'base_after': 137, 'no_base': 141}
{'base_first': 0.4072494669509595, 'base_after': 0.2921108742004264, 'no_base': 0.3006396588486141}


Unnamed: 0,table_id,staff_id,start_time,end_time,activity_type


In [50]:
np.

157