In [None]:
import pandas as pd
import datetime
import calendar

In [None]:
def generate_weekday_count(data):
    weekday_count = {}
    for i in list(data['day of week']):
        weekday_count[i] = 1 + weekday_count[i] if i in weekday_count else 1
    return weekday_count

In [None]:
def generate_stats(activities_data, by='day of week'):
    if by=='day of week':
        results = activities_data.groupby(['day of week']).mean() 
        results.columns = ['avg']
        results['std'] = activities_data.groupby(['day of week']).std() 
    else:
        results = activities_data.groupby(['gap', 'time']).mean() 
        results.columns = ['avg']
        results['std'] = activities_data.groupby(['gap', 'time']).std() 
        results.reset_index(inplace=True)
        results = results[results['gap']==False]
    results['avg+std'] = results['avg']+results['std']
    results['avg-std'] = results['avg']-results['std']
    results['avg+2std'] = results['avg']+2*results['std']
    results['avg-2std'] = results['avg']-2*results['std']
    results['avg+3std'] = results['avg']+3*results['std']
    results['avg-3std'] = results['avg']-3*results['std']
    results['avg-3std'] = results['avg-3std'].apply(lambda x: 0 if x <0 else x)
    return results

In [None]:
def generate_score(activities_threshold):
    results =  activities_threshold
    results['>avg'] = results['all daily count'] >= results['avg']
    results['<avg'] = results['>avg'].apply(lambda x: not x)
    results['>avg+std'] = results['all daily count'] >= results['avg+std']
    results['<avg-std'] = results['all daily count'] <= results['avg-std']
    results['>avg+2std'] = results['all daily count'] >= results['avg+2std']
    results['<avg-2std'] = results['all daily count'] <= results['avg-2std']
    results['>avg+3std'] = results['all daily count'] >= results['avg+3std']
    results['<avg-3std'] = results['all daily count'] <= results['avg-3std']
    return results

In [None]:
def run_analysis(filename, write_data=False):
    data = pd.read_excel(filename, sheet_name='data')
    hour_table = pd.read_excel(filename, sheet_name='hour_table')
    date_table = pd.read_excel(filename, sheet_name='date_table')
    all_daily_activities = pd.read_excel(filename, sheet_name='all_daily_activities')
    ind_daily_activities = pd.read_excel(filename, sheet_name='ind_daily_activities').fillna(method='ffill')
    all_hourly_activities = pd.read_excel(filename, sheet_name='all_hourly_activities')
    ind_hourly_activities = pd.read_excel(filename, sheet_name='ind_hourly_activities').fillna(method='ffill')
    
    all_day_of_week_stat = generate_stats(all_daily_activities, by='day of week')
    std_thresholds = all_daily_activities.merge(all_day_of_week_stat, on=['day of week'], how='left')
    threshold_detections = generate_score(std_thresholds)
    
    all_hourly_stat = generate_stats(all_hourly_activities, by='hourly')
    
    
    if write_data:
        with pd.ExcelWriter('analyzed_data.xlsx') as writer:
            all_day_of_week_stat.to_excel(writer, sheet_name='all_day_of_week_stat')
            all_hourly_stat.to_excel(writer, sheet_name='all_hourly_stat', index=False)
            threshold_detections.to_excel(writer, sheet_name='detected_results', index=False)
    return

In [None]:
filename = 'processed_data.xlsx'
run_analysis(filename, write_data=True)

#### Test Script

In [None]:
filename = 'processed_data.xlsx'
data = pd.read_excel(filename, sheet_name='data')
hour_table = pd.read_excel(filename, sheet_name='hour_table')
date_table = pd.read_excel(filename, sheet_name='date_table')
all_daily_activities = pd.read_excel(filename, sheet_name='all_daily_activities')
ind_daily_activities = pd.read_excel(filename, sheet_name='ind_daily_activities').fillna(method='ffill')
all_hourly_activities = pd.read_excel(filename, sheet_name='all_hourly_activities')
ind_hourly_activities = pd.read_excel(filename, sheet_name='ind_hourly_activities').fillna(method='ffill')

all_day_of_week_stat = generate_stats(all_daily_activities, by='day of week')
std_thresholds = all_daily_activities.merge(all_day_of_week_stat, on=['day of week'], how='left')
threshold_detections = generate_score(std_thresholds)

all_hourly_stat = generate_stats(all_hourly_activities, by='hourly')