# 4X Study Dashboard
This notebook is used to analyze data from the 4X study conducted in March 2018.

# Imports and Global Setup

In [317]:
# data processing
import math
import json
import itertools
from multiprocessing import Pool, cpu_count
from multiprocessing.dummy import Pool as ThreadPool 
from functools import reduce
from collections import Counter

import requests
import pandas as pd
import numpy as np
from scipy import stats

from datetime import datetime, timezone
from copy import deepcopy
from tqdm import tqdm_notebook as tqdm

In [318]:
# google
import gspread
from oauth2client.service_account import ServiceAccountCredentials
from operator import itemgetter

In [319]:
# google maps
import os
import gmaps
import gmaps.datasets

gmaps.configure(api_key=os.environ['GMAPS'])

In [320]:
# plotting
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
%matplotlib inline

In [321]:
# palette
sns.set(font_scale=1.5, style='whitegrid')
# sns.set_palette("cubehelix")
sns.set_palette(sns.cubehelix_palette(rot=-.4))

# Load in data

In [322]:
# URLs for different conditions
url = 'https://les-expand.herokuapp.com/parse/classes/'

# shared header and data
header = {'X-Parse-Application-Id': 'PkngqKtJygU9WiQ1GXM9eC0a17tKmioKKmpWftYr'}
data = {'limit': '10000'}

# study start and end
start_time = '2018-08-27 05:00:00'
end_time = '2018-09-10 05:00:00'
print('Study Length: {}'.format(datetime.strptime(end_time, '%Y-%m-%d %H:%M:%S') - datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S')))

Study Length: 14 days, 0:00:00


In [323]:
def load_data(base_url, header, data, start_time, end_time):
    """
    Loads in all needed tables from database, given url.
    
    Input: 
        base_url (string): url to pull data from
        header (dict): application id and other auth
        data (dict): data to pass into query
        start_time (datetime): start time for data
        end_time (datetime): end time for data 
    
    Return:
        (dict): dict where keys are collection names and values are Pandas objects containing data
    """
    # declare collection list
    collection_list = ['_User', 'ServerLog', 'DebugLog', 'ForYouViewLog', 'ApplicationHeartbeat',
                       'TaskLocations', 'LocationUpdates', 'LocationTypeMetadata', 'beacons', 'EnRouteLocations',
                       'AtLocationNotificationsSent', 'AtLocationNotificationResponses',
                       'EnRouteNotificationsSent', 'EnRouteNotificationResponses',
                       'AtDistanceNotificationsSent', 'AtDistanceNotificationResponses']
    
    # loop through and load data for each collection
    output = {}
    for collection in tqdm(collection_list):
        # check if location updates
        if collection == 'LocationUpdates':
            curr_data = {'limit': '30000', 'order': '-createdAt'}
            current_response = requests.get(base_url + collection, headers=header, data=curr_data)
        else:
            current_response = requests.get(base_url + collection, headers=header, data=data)

        current_data = pd.DataFrame(current_response.json()['results'])
        if len(current_data) != 0 and collection not in ['LocationTypeMetadata', 'EnRouteLocations']:
            current_data['createdAt'] = pd.to_datetime(current_data['createdAt'])
            current_data['updatedAt'] = pd.to_datetime(current_data['updatedAt'])
            
            if collection != '_User':
                current_data = current_data[(current_data['createdAt'] >= start_time) & (current_data['createdAt'] < end_time)]

        output[collection] = current_data
    
    return output

def load_data_parallel(url):
    return load_data(url, header, data, start_time, end_time)

In [324]:
# fetch log data
raw_data = load_data(url, header, data, start_time, end_time)

HBox(children=(IntProgress(value=0, max=16), HTML(value='')))




# Data Setup
This section of the notebook is used to monitor the data coming in from the study. Some measures we see here may be used within the paper

## Common Functions and Data

In [325]:
def get_merged_at_location(tasklocations, atlocnotif, atlocresp):
    """
    Sets up a Pandas DataFrame with (1) TaskLocation, (2) NotificationSent, and (3) NotificationResponse data
    merged together for AtLocation case.
    
    Input:
        tasklocations (DataFrame): DataFrame of TaskLocations
        atlocnotif (DataFrame): DataFrame of AtLocationNotificationsSent
        atlocresp (DataFrame): DataFrame of AtLocationNotificationResponses
    
    Return:
        (DataFrame): merged DataFrame of inputs
    """
    # get AtLocationNotifications without duplicates
    atlocnotif.drop_duplicates(subset=['taskLocationId', 'vendorId'], keep='last', inplace=True)
    atlocnotif.drop(['objectId', 'createdAt', 'updatedAt'], axis=1, inplace=True)

    # get AtLocationNotificationResponses without duplicates
    atlocresp.drop_duplicates(subset=['taskLocationId', 'vendorId'], keep='last', inplace=True)
    atlocresp.drop(['objectId', 'createdAt', 'updatedAt'], axis=1, inplace=True)
    
    # combine AtLocation notifications and responses, with some data from TaskLocations
    atloc = atlocnotif.merge(tasklocations[['objectId', 'locationType', 'locationName', 'beaconId']],
                             how='inner', left_on='taskLocationId', right_on='objectId')
    atloc = atloc.merge(atlocresp[['question', 'response', 'timestamp', 'taskLocationId', 'vendorId']],
                        how='left', on=['taskLocationId', 'vendorId'])
    
    # clean columns
    del atloc['objectId']
    atloc.rename(columns={'timestamp_x': 'notificationTimestamp', 'timestamp_y': 'responseTimestamp'},
                 inplace=True)
    
    # fill blank columns
    atloc[['question', 'response']] = atloc[['question', 'response']].fillna(value='Missed Notification')
    atloc[['distanceToLocation', 'responseTimestamp']] = atloc[['distanceToLocation', 'responseTimestamp']].fillna(value=-1)

    # type columns
    atloc_int_cols = ['gmtOffset','notificationTimestamp', 'responseTimestamp']
    atloc[atloc_int_cols] = atloc[atloc_int_cols].apply(lambda x: x.astype(np.int64))
    
    # add remappedResponses column 
    invalid_responses = ['I don\'t know', 'com.apple.UNNotificationDismissActionIdentifier', 'Missed Notification']
    atloc['remappedResponses'] = atloc['response']
    atloc.loc[~atloc['remappedResponses'].isin(invalid_responses), 'remappedResponses'] = 'Valid Response'
    atloc.loc[atloc['remappedResponses'] == 'com.apple.UNNotificationDismissActionIdentifier', 'remappedResponses'] = 'Dismissed Notification'
    atloc.loc[atloc['remappedResponses'] == 'I don\'t know', 'remappedResponses'] = '"I don\'t know" Response'
    
    # reorder columns
    atloc_col_ordering = ['taskLocationId', 'vendorId', 'beaconId', 'distanceToLocation',
                          'locationType', 'locationName','gmtOffset', 'notificationTimestamp', 'notificationString',
                          'question', 'response', 'remappedResponses', 'responseTimestamp']
    atloc = atloc[atloc_col_ordering]
    
    return atloc

In [326]:
def get_merged_at_distance(tasklocations, atdistnotif, atdistresp):
    """
    Sets up a Pandas DataFrame with (1) TaskLocation, (2) NotificationSent, and (3) NotificationResponse data
    merged together for AtDistance case.
    
    Input:
        tasklocations (DataFrame): DataFrame of TaskLocations
        atdistnotif (DataFrame): DataFrame of AtDistanceNotificationsSent
        atdistresp (DataFrame): DataFrame of AtDistanceNotificationResponses
    
    Return:
        (DataFrame): merged DataFrame of inputs
    """
    # get AtDistanceNotifications without duplicates
    atdistnotif.drop_duplicates(subset=['taskLocationId', 'vendorId'], keep='last', inplace=True)
    atdistnotif.drop(['objectId', 'createdAt', 'updatedAt'], axis=1, inplace=True)

    # get AtDistanceNotificationResponses without duplicates
    atdistresp.drop_duplicates(subset=['taskLocationId', 'vendorId'], keep='last', inplace=True)
    atdistresp.drop(['objectId', 'createdAt', 'updatedAt'], axis=1, inplace=True)
    
    # combine AtDistance notifications and responses, with some data from TaskLocations
    atdist = atdistnotif.merge(tasklocations[['objectId', 'beaconId', 'locationName']],
                               how='inner', left_on='taskLocationId', right_on='objectId')
    atdist = atdist.merge(atdistresp[['emaResponse', 'timestamp', 'taskLocationId', 'vendorId']],
                          how='left', on=['taskLocationId', 'vendorId'])
    
    # clean columns
    del atdist['objectId']
    atdist.rename(columns={'timestamp_x': 'notificationTimestamp', 'timestamp_y': 'responseTimestamp'}, inplace=True)

    atdist_col_ordering = ['taskLocationId', 'vendorId', 'beaconId', 'distanceToLocation', 'bearingToLocation',
                           'locationType', 'locationName', 'notificationDistance', 'sentBy', 'infoIncluded',
                           'gmtOffset', 'notificationTimestamp', 'emaResponse', 'responseTimestamp']
    atdist = atdist[atdist_col_ordering]
    
    # fill blank columns
    atdist['emaResponse'] = atdist['emaResponse'].fillna(value='Missed Notification')
    atdist['responseTimestamp'] = atdist['responseTimestamp'].fillna(value=-1)
    
    # remap columns
    atdist.loc[atdist['emaResponse'] == 'com.apple.UNNotificationDismissActionIdentifier', 'emaResponse'] = 'Dismissed Notification'

    # type columns
    atdist_int_cols = ['gmtOffset','notificationTimestamp', 'responseTimestamp']
    atdist[atdist_int_cols] = atdist[atdist_int_cols].apply(lambda x: x.astype(np.int64))
    
    return atdist

In [327]:
def get_merged_en_route(enroutelocations, enroutenotif, enrouteresp):
    """
    Sets up a Pandas DataFrame with (1) EnRouteLocations, (2) NotificationSent, and (3) NotificationResponse data
    merged together for EnRoute case.
    
    Input:
        enroutelocations (DataFrame): DataFrame of EnRouteLocations
        enroutenotif (DataFrame): DataFrame of EnRouteNotificationsSent
        enrouteresp (DataFrame): DataFrame of EnRouteNotificationResponses
    
    Return:
        (DataFrame): merged DataFrame of inputs
    """
    # get EnRouteNotifications without duplicates
    enroutenotif.drop_duplicates(subset=['enRouteLocationId', 'vendorId'], keep='last', inplace=True)
    enroutenotif.drop(['objectId', 'createdAt', 'updatedAt'], axis=1, inplace=True)

    # get AtDistanceNotificationResponses without duplicates
    enrouteresp.drop_duplicates(subset=['enRouteLocationId', 'vendorId'], keep='last', inplace=True)
    enrouteresp.drop(['objectId', 'createdAt', 'updatedAt'], axis=1, inplace=True)
    
    # combine EnRouteNotifications and responses, with some data from EnRouteLocations
    enroute = enroutenotif.merge(enroutelocations[['objectId', 'locationName', 'locationType']],
                               how='inner', left_on='enRouteLocationId', right_on='objectId')
    enroute = enroute.merge(enrouteresp[['questionResponse', 'timestamp', 'enRouteLocationId', 'vendorId']],
                            how='left', on=['enRouteLocationId', 'vendorId'])
    
    # clean columns
    del enroute['objectId']
    enroute.rename(columns={'timestamp_x': 'notificationTimestamp', 'timestamp_y': 'responseTimestamp'}, inplace=True)

    enroute_col_ordering = ['enRouteLocationId', 'vendorId', 'distanceToLocation', 'locationType', 'locationName',
                           'gmtOffset', 'notificationTimestamp', 'questionResponse', 'responseTimestamp']
    enroute = enroute[enroute_col_ordering]
    
    # fill blank columns
    enroute['questionResponse'] = enroute['questionResponse'].fillna(value='Missed Notification')
    enroute.loc[enroute['questionResponse'] == 'com.apple.UNNotificationDismissActionIdentifier', 'questionResponse'] = 'Dismissed Notification'
    
    enroute['responseTimestamp'] = enroute['responseTimestamp'].fillna(value=-1)
    
     # add validResponse column 
    invalid_responses = ['I don\'t know', 'com.apple.UNNotificationDismissActionIdentifier', 'Missed Notification']
    enroute['remappedResponses'] = enroute['questionResponse']
    enroute.loc[~enroute['remappedResponses'].isin(invalid_responses), 'remappedResponses'] = 'Valid Response'
    enroute.loc[enroute['remappedResponses'] == 'I don\'t know', 'remappedResponses'] = '"I don\'t know" Response'

    # type columns
    enroute_int_cols = ['gmtOffset','notificationTimestamp', 'responseTimestamp']
    enroute[enroute_int_cols] = enroute[enroute_int_cols].apply(lambda x: x.astype(np.int64))
    
    return enroute

In [328]:
def get_dead_apps(serverlog):
    """
    Returns a list of lists for dead apps that server has notified.
    
    Input: 
        server (DataFrame): DataFrame of ServerLog
    
    Return:
        (list of lists of strings): all dead applications notified via push
    """
    notify_log_strings = serverlog[serverlog['logString'].str.contains('Notified dead')]['logString']
    deadapp_notif_list = list(notify_log_strings.apply(lambda x: x[x.find('[') + 1:-1].split(', ')))
    return deadapp_notif_list

In [329]:
location_scaffolds = {}
for index, row in raw_data['LocationTypeMetadata'].iterrows():
    location_scaffolds[row['locationType']] = row['scaffold']

## Data Setup

In [330]:
serverlog = deepcopy(raw_data['ServerLog'])
deadapp_notify = get_dead_apps(serverlog)
print('Last dead apps notified (count = {}): \n{}'.format(len(deadapp_notify[-1]),
                                                          '\n'.join(deadapp_notify[-1])))

flattened_deadapps = reduce(lambda x, y: x + y, deadapp_notify, [])
Counter(flattened_deadapps)

Last dead apps notified (count = 5): 
0DD045E1-0166-461B-B8CF-EA57DBFB5AAD
8E710E79-A370-42B9-9D2C-ECA8623C1F3B
D40E7F30-C6F1-45FD-80D2-50AEDBAAF4A3
9A9310F7-B69A-414F-803D-CF2153014C25
32019C1D-6090-44C7-8746-E2E3C5C79229


Counter({'': 2,
         '5899504E-1461-48DE-9ACC-FB9F2A1FDAF8': 2,
         'D40E7F30-C6F1-45FD-80D2-50AEDBAAF4A3': 3,
         '0DD045E1-0166-461B-B8CF-EA57DBFB5AAD': 3,
         '8E710E79-A370-42B9-9D2C-ECA8623C1F3B': 3,
         'EB73A542-9446-4AFC-BC03-09FA3DA71CEA': 1,
         'E2DAC389-DD64-4AF8-934A-6D1EF7D68507': 1,
         '32019C1D-6090-44C7-8746-E2E3C5C79229': 2,
         'AB80C727-EACF-437C-8460-735DEB25361E': 1,
         '9A9310F7-B69A-414F-803D-CF2153014C25': 1})

In [331]:
users = deepcopy(raw_data['_User'])
users = users[users['vendorId'] != '']

tasklocations = deepcopy(raw_data['TaskLocations'])
enroutelocations = deepcopy(raw_data['EnRouteLocations'])

atlocnotif = deepcopy(raw_data['AtLocationNotificationsSent'])
atlocresp = deepcopy(raw_data['AtLocationNotificationResponses'])

atdistnotif = deepcopy(raw_data['AtDistanceNotificationsSent'])
atdistresp = deepcopy(raw_data['AtDistanceNotificationResponses'])

enroutenotif = deepcopy(raw_data['EnRouteNotificationsSent'])
enrouteresp = deepcopy(raw_data['EnRouteNotificationResponses'])

foryou = deepcopy(raw_data['ForYouViewLog'])

location_updates = deepcopy(raw_data['LocationUpdates'])

print('4X | User Count: {}'.format(len(users)))
print('4X | At location notifications: {}, At location responses: {}'.format(len(atlocnotif), len(atlocresp)))
print('4X | At distance notifications: {}, At distance responses: {}'.format(len(atdistnotif), len(atdistresp)))
print('4X | En route notifications: {}, En route responses: {}'.format(len(enroutenotif), len(enrouteresp)))

4X | User Count: 19
4X | At location notifications: 345, At location responses: 107
4X | At distance notifications: 170, At distance responses: 83
4X | En route notifications: 391, En route responses: 27


In [332]:
# exclude kapil and rob
user_exclude_ids = [
    '20E1994C-9296-466F-B8FB-B5804C1C2121', # kapil
    '88991A9A-2302-4359-B8AE-4E2F2505E6AE', # rob
    '' # random blank id
]

users = users[~users['vendorId'].isin(user_exclude_ids)]

atlocnotif = atlocnotif[~atlocnotif['vendorId'].isin(user_exclude_ids)]
atlocresp = atlocresp[~atlocresp['vendorId'].isin(user_exclude_ids)]

atdistnotif = atdistnotif[~atdistnotif['vendorId'].isin(user_exclude_ids)]
atdistresp = atdistresp[~atdistresp['vendorId'].isin(user_exclude_ids)]

enroutenotif = enroutenotif[~enroutenotif['vendorId'].isin(user_exclude_ids)]
enrouteresp = enrouteresp[~enrouteresp['vendorId'].isin(user_exclude_ids)]

foryou = foryou[~foryou['vendorId'].isin(user_exclude_ids)]

location_updates = location_updates[~location_updates['vendorId'].isin(user_exclude_ids)]

print('4X | User Count: {}'.format(len(users)))
print('4X | At location notifications: {}, At location responses: {}'.format(len(atlocnotif), len(atlocresp)))
print('4X | At distance notifications: {}, At distance responses: {}'.format(len(atdistnotif), len(atdistresp)))
print('4X | En route notifications: {}, En route responses: {}'.format(len(enroutenotif), len(enrouteresp)))

4X | User Count: 17
4X | At location notifications: 227, At location responses: 104
4X | At distance notifications: 140, At distance responses: 81
4X | En route notifications: 391, En route responses: 27


In [333]:
# merged data frame with all AtLocation data
atloc = get_merged_at_location(deepcopy(tasklocations),
                                  deepcopy(atlocnotif),
                                  deepcopy(atlocresp))

In [334]:
# merged data frame with all AtDistance data
atdist = get_merged_at_distance(deepcopy(tasklocations),
                                   deepcopy(atdistnotif),
                                   deepcopy(atdistresp))
atdist = atdist[atdist['infoIncluded'] == True] # 4X Only: remove cases without info

In [335]:
# eXpand providing more data at location overall
atdist_overall_moreinfo = atdist[atdist['emaResponse'].isin(["Yes! This info is useful. I'm going to go there.", "Yes. This info is useful but I'm already going there."])]
atdist_overall_moreinfo = atdist_overall_moreinfo.merge(atloc, how='left', on=['vendorId', 'taskLocationId'])
atdist_overall_moreinfo['remappedResponses'].fillna(value='Did Not Go', inplace=True)
atdist_overall_moreinfo

Unnamed: 0,taskLocationId,vendorId,beaconId_x,distanceToLocation_x,bearingToLocation,locationType_x,locationName_x,notificationDistance,sentBy,infoIncluded,...,distanceToLocation_y,locationType_y,locationName_y,gmtOffset_y,notificationTimestamp_y,notificationString,question,response,remappedResponses,responseTimestamp_y
0,vfH4ECgIIY,D40E7F30-C6F1-45FD-80D2-50AEDBAAF4A3,rIzJ4nI1lp,266.317099,279.773872,gym,SPAC,300,location updates,True,...,,,,,,,,,Did Not Go,
1,42gjPJQ85x,AB80C727-EACF-437C-8460-735DEB25361E,,164.300292,227.788553,freefood,Ford Ground Floor (by staircase),300,location updates,True,...,,,,,,,,,Did Not Go,
2,45czWkNezu,600C7D5C-EF49-4C95-B0D1-135DBBE0BE5C,tfTCPUaMUD,177.731929,181.142513,workspace,Mudd Common Area (outside library),300,location updates,True,...,,,,,,,,,Did Not Go,
3,IeRHOjPCB0,AB80C727-EACF-437C-8460-735DEB25361E,,87.094625,201.42926,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,27.368637,freefood,Tech Ground Floor (outside LR3 classroom),-18000.0,1535570000.0,"Notified for IeRHOjPCB0 (42.057569, -87.676077...",kindfood,sweet,Valid Response,1535570000.0
4,IeRHOjPCB0,5899504E-1461-48DE-9ACC-FB9F2A1FDAF8,,86.05708,239.268627,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,19.360045,freefood,Tech Ground Floor (outside LR3 classroom),-18000.0,1535572000.0,"Notified for IeRHOjPCB0 (42.057569, -87.676077...",specificfood,ice cream,Valid Response,1535572000.0
5,tDgskz29QX,25FF4B97-71BF-4BB3-A701-A6937D8DDF9A,rIzJ4nI1lp,290.385359,23.101608,gym,SPAC,300,location updates,True,...,,,,,,,,,Did Not Go,
6,qEkg5qqzH6,5899504E-1461-48DE-9ACC-FB9F2A1FDAF8,,86.375498,300.020887,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,28.646216,freefood,Tech Ground Floor (outside LR3 classroom),-18000.0,1535660000.0,"Notified for qEkg5qqzH6 (42.057569, -87.676077...",kindfood,sweet,Valid Response,1535660000.0
7,qEkg5qqzH6,AB80C727-EACF-437C-8460-735DEB25361E,,85.210044,95.501341,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,,,,,,,,,Did Not Go,
8,qEkg5qqzH6,E2DAC389-DD64-4AF8-934A-6D1EF7D68507,,85.939565,282.469459,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,22.316348,freefood,Tech Ground Floor (outside LR3 classroom),-18000.0,1535665000.0,"Notified for qEkg5qqzH6 (42.057569, -87.676077...",typefood,vanilla,Valid Response,1535665000.0
9,4qpJVC2lxy,5899504E-1461-48DE-9ACC-FB9F2A1FDAF8,,107.282775,73.185908,freefood,Tech Ground Floor Lobby,300,location updates,True,...,14.473572,freefood,Tech Ground Floor Lobby,-18000.0,1535743000.0,"Notified for 4qpJVC2lxy (42.057808, -87.676126...",specificfood,bags of nuts/chips,Valid Response,1535743000.0


In [336]:
atdist_overall_moreinfo['time_diff_seconds'] = (atdist_overall_moreinfo['notificationTimestamp_y'].fillna(0) - atdist_overall_moreinfo['responseTimestamp_x']).astype(int)
atdist_overall_moreinfo['time_diff_minutes'] = atdist_overall_moreinfo['time_diff_seconds'] / 60
atdist_overall_moreinfo

Unnamed: 0,taskLocationId,vendorId,beaconId_x,distanceToLocation_x,bearingToLocation,locationType_x,locationName_x,notificationDistance,sentBy,infoIncluded,...,locationName_y,gmtOffset_y,notificationTimestamp_y,notificationString,question,response,remappedResponses,responseTimestamp_y,time_diff_seconds,time_diff_minutes
0,vfH4ECgIIY,D40E7F30-C6F1-45FD-80D2-50AEDBAAF4A3,rIzJ4nI1lp,266.317099,279.773872,gym,SPAC,300,location updates,True,...,,,,,,,Did Not Go,,-1535410828,-25590180.0
1,42gjPJQ85x,AB80C727-EACF-437C-8460-735DEB25361E,,164.300292,227.788553,freefood,Ford Ground Floor (by staircase),300,location updates,True,...,,,,,,,Did Not Go,,-1535489554,-25591490.0
2,45czWkNezu,600C7D5C-EF49-4C95-B0D1-135DBBE0BE5C,tfTCPUaMUD,177.731929,181.142513,workspace,Mudd Common Area (outside library),300,location updates,True,...,,,,,,,Did Not Go,,-1535570677,-25592840.0
3,IeRHOjPCB0,AB80C727-EACF-437C-8460-735DEB25361E,,87.094625,201.42926,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,Tech Ground Floor (outside LR3 classroom),-18000.0,1535570000.0,"Notified for IeRHOjPCB0 (42.057569, -87.676077...",kindfood,sweet,Valid Response,1535570000.0,135,2.25
4,IeRHOjPCB0,5899504E-1461-48DE-9ACC-FB9F2A1FDAF8,,86.05708,239.268627,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,Tech Ground Floor (outside LR3 classroom),-18000.0,1535572000.0,"Notified for IeRHOjPCB0 (42.057569, -87.676077...",specificfood,ice cream,Valid Response,1535572000.0,71,1.183333
5,tDgskz29QX,25FF4B97-71BF-4BB3-A701-A6937D8DDF9A,rIzJ4nI1lp,290.385359,23.101608,gym,SPAC,300,location updates,True,...,,,,,,,Did Not Go,,-1535579702,-25593000.0
6,qEkg5qqzH6,5899504E-1461-48DE-9ACC-FB9F2A1FDAF8,,86.375498,300.020887,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,Tech Ground Floor (outside LR3 classroom),-18000.0,1535660000.0,"Notified for qEkg5qqzH6 (42.057569, -87.676077...",kindfood,sweet,Valid Response,1535660000.0,93,1.55
7,qEkg5qqzH6,AB80C727-EACF-437C-8460-735DEB25361E,,85.210044,95.501341,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,,,,,,,Did Not Go,,-1535660017,-25594330.0
8,qEkg5qqzH6,E2DAC389-DD64-4AF8-934A-6D1EF7D68507,,85.939565,282.469459,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,Tech Ground Floor (outside LR3 classroom),-18000.0,1535665000.0,"Notified for qEkg5qqzH6 (42.057569, -87.676077...",typefood,vanilla,Valid Response,1535665000.0,214,3.566667
9,4qpJVC2lxy,5899504E-1461-48DE-9ACC-FB9F2A1FDAF8,,107.282775,73.185908,freefood,Tech Ground Floor Lobby,300,location updates,True,...,Tech Ground Floor Lobby,-18000.0,1535743000.0,"Notified for 4qpJVC2lxy (42.057808, -87.676126...",specificfood,bags of nuts/chips,Valid Response,1535743000.0,170,2.833333


In [337]:
atdist_overall_moreinfo.groupby(['emaResponse', 'remappedResponses'])['remappedResponses'].count().reset_index(name='count')

Unnamed: 0,emaResponse,remappedResponses,count
0,Yes! This info is useful. I'm going to go there.,"""I don't know"" Response",2
1,Yes! This info is useful. I'm going to go there.,Did Not Go,6
2,Yes! This info is useful. I'm going to go there.,Missed Notification,1
3,Yes! This info is useful. I'm going to go there.,Valid Response,12
4,Yes. This info is useful but I'm already going...,Did Not Go,2
5,Yes. This info is useful but I'm already going...,Valid Response,1


In [338]:
# eXpand providing more data at location if deviating
atdist_deviate_moreinfo = atdist[atdist['emaResponse'].isin(["Yes! This info is useful. I'm going to go there."])]
atdist_deviate_moreinfo = atdist_deviate_moreinfo.merge(atloc, how='left', on=['vendorId', 'taskLocationId'])
atdist_deviate_moreinfo['remappedResponses'].fillna(value='Did Not Go', inplace=True)
atdist_deviate_moreinfo

Unnamed: 0,taskLocationId,vendorId,beaconId_x,distanceToLocation_x,bearingToLocation,locationType_x,locationName_x,notificationDistance,sentBy,infoIncluded,...,distanceToLocation_y,locationType_y,locationName_y,gmtOffset_y,notificationTimestamp_y,notificationString,question,response,remappedResponses,responseTimestamp_y
0,42gjPJQ85x,AB80C727-EACF-437C-8460-735DEB25361E,,164.300292,227.788553,freefood,Ford Ground Floor (by staircase),300,location updates,True,...,,,,,,,,,Did Not Go,
1,45czWkNezu,600C7D5C-EF49-4C95-B0D1-135DBBE0BE5C,tfTCPUaMUD,177.731929,181.142513,workspace,Mudd Common Area (outside library),300,location updates,True,...,,,,,,,,,Did Not Go,
2,IeRHOjPCB0,AB80C727-EACF-437C-8460-735DEB25361E,,87.094625,201.42926,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,27.368637,freefood,Tech Ground Floor (outside LR3 classroom),-18000.0,1535570000.0,"Notified for IeRHOjPCB0 (42.057569, -87.676077...",kindfood,sweet,Valid Response,1535570000.0
3,IeRHOjPCB0,5899504E-1461-48DE-9ACC-FB9F2A1FDAF8,,86.05708,239.268627,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,19.360045,freefood,Tech Ground Floor (outside LR3 classroom),-18000.0,1535572000.0,"Notified for IeRHOjPCB0 (42.057569, -87.676077...",specificfood,ice cream,Valid Response,1535572000.0
4,qEkg5qqzH6,5899504E-1461-48DE-9ACC-FB9F2A1FDAF8,,86.375498,300.020887,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,28.646216,freefood,Tech Ground Floor (outside LR3 classroom),-18000.0,1535660000.0,"Notified for qEkg5qqzH6 (42.057569, -87.676077...",kindfood,sweet,Valid Response,1535660000.0
5,qEkg5qqzH6,AB80C727-EACF-437C-8460-735DEB25361E,,85.210044,95.501341,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,,,,,,,,,Did Not Go,
6,qEkg5qqzH6,E2DAC389-DD64-4AF8-934A-6D1EF7D68507,,85.939565,282.469459,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,22.316348,freefood,Tech Ground Floor (outside LR3 classroom),-18000.0,1535665000.0,"Notified for qEkg5qqzH6 (42.057569, -87.676077...",typefood,vanilla,Valid Response,1535665000.0
7,4qpJVC2lxy,5899504E-1461-48DE-9ACC-FB9F2A1FDAF8,,107.282775,73.185908,freefood,Tech Ground Floor Lobby,300,location updates,True,...,14.473572,freefood,Tech Ground Floor Lobby,-18000.0,1535743000.0,"Notified for 4qpJVC2lxy (42.057808, -87.676126...",specificfood,bags of nuts/chips,Valid Response,1535743000.0
8,4qpJVC2lxy,AB80C727-EACF-437C-8460-735DEB25361E,,79.92868,150.839819,freefood,Tech Ground Floor Lobby,300,location updates,True,...,12.76693,freefood,Tech Ground Floor Lobby,-18000.0,1535742000.0,"Notified for 4qpJVC2lxy (42.057808, -87.676126...",kindfood,savory,Valid Response,1535742000.0
9,4qpJVC2lxy,C5C548C5-3329-4DD5-9A08-C2937820E0B6,,255.266625,62.667222,freefood,Tech Ground Floor Lobby,300,location updates,True,...,25.740883,freefood,Tech Ground Floor Lobby,-18000.0,1535742000.0,"Notified for 4qpJVC2lxy (42.057808, -87.676126...",specificfood,bags of nuts/chips,Valid Response,1535745000.0


In [339]:
# merged data from with all EnRoute data
enroute = get_merged_en_route(deepcopy(enroutelocations),
                                 deepcopy(enroutenotif),
                                 deepcopy(enrouteresp))
enroute

Unnamed: 0,enRouteLocationId,vendorId,distanceToLocation,locationType,locationName,gmtOffset,notificationTimestamp,questionResponse,responseTimestamp,remappedResponses
0,tnffEhyqJZ,D40E7F30-C6F1-45FD-80D2-50AEDBAAF4A3,11.810767,bikerack,Tech Rear Bike Rack,-18000,1535411299,yes,1535411527,Valid Response
1,tnffEhyqJZ,F262544F-8C00-4362-87B2-AF8FEE66DB4D,11.162052,bikerack,Tech Rear Bike Rack,-18000,1536174661,Dismissed Notification,1536174647,Valid Response
2,tnffEhyqJZ,537DF5B3-48B8-4C86-A39F-5DE5DFEE19EC,22.177841,bikerack,Tech Rear Bike Rack,-18000,1536184313,Dismissed Notification,1536184318,Valid Response
3,tnffEhyqJZ,9A9310F7-B69A-414F-803D-CF2153014C25,12.104073,bikerack,Tech Rear Bike Rack,-18000,1536340890,yes,1536340893,Valid Response
4,Ner3yTGWzJ,600C7D5C-EF49-4C95-B0D1-135DBBE0BE5C,28.08531,bikerack,SPAC Bike Rack,-18000,1535570851,yes,1535570879,Valid Response
5,Ner3yTGWzJ,25FF4B97-71BF-4BB3-A701-A6937D8DDF9A,15.7458,bikerack,SPAC Bike Rack,-18000,1536100158,yes,1536157164,Valid Response
6,Ner3yTGWzJ,537DF5B3-48B8-4C86-A39F-5DE5DFEE19EC,24.561507,bikerack,SPAC Bike Rack,-18000,1536184537,no,1536184576,Valid Response
7,bDFfkSV5PZ,E2DAC389-DD64-4AF8-934A-6D1EF7D68507,29.218763,bikerack,Ford Bike Rack,-18000,1535664399,yes,1535664361,Valid Response
8,bDFfkSV5PZ,5C2E50F3-D8D3-4D79-AF2C-B63360D11E5A,12.036184,bikerack,Ford Bike Rack,-18000,1536085030,yes,1536085024,Valid Response
9,bDFfkSV5PZ,5899504E-1461-48DE-9ACC-FB9F2A1FDAF8,28.719212,bikerack,Ford Bike Rack,-18000,1536173153,yes,1536173157,Valid Response


# Current User Locations

In [340]:
def get_last_update_for_user(all_location_updates, target_vendor_id):
    """
    Gets the last location update for a user, given a vendor id.
    
    Input:
        all_location_updates (DataFrame): all location updatesm sorted in descending order
        target_vendor_id (string): vendor id to get data for
    
    Output:
        (dict): last location update for user
    """
    current_user_updates = all_location_updates[all_location_updates['vendorId'] == target_vendor_id]
    if len(current_user_updates) > 0:
        return dict(all_location_updates[all_location_updates['vendorId'] == target_vendor_id].iloc[0])
    
    return {
        'latitude': 0,
        'longitude': 0,
        'timestamp': 0,
        'gmtOffset': 0
    }

In [341]:
# sort and save location updates
location_updates.sort_values('createdAt', ascending=False, inplace=True)

In [342]:
# get last known location for each user
user_last_locations = []

for current_vendor_id in list(users['vendorId'].unique()):
    last_location_update = get_last_update_for_user(location_updates, current_vendor_id)
    output_dict = {
        'vendorId': current_vendor_id,
        'location': (last_location_update['latitude'], last_location_update['longitude']),
        'local_timestamp': datetime.fromtimestamp(last_location_update['timestamp']).strftime('%Y-%m-%d %H:%M:%S')
    }
    
    user_last_locations.append(output_dict)
    
user_last_locations_df = pd.DataFrame(user_last_locations)
user_last_locations_df = user_last_locations_df.merge(users[['vendorId', 'firstName', 'lastName']])
user_last_locations_df['name'] = user_last_locations_df['firstName'] + ' ' + user_last_locations_df['lastName']
user_last_locations_df.sort_values('local_timestamp').reset_index(drop=True)

Unnamed: 0,local_timestamp,location,vendorId,firstName,lastName,name
0,1969-12-31 18:00:00,"(0, 0)",32019C1D-6090-44C7-8746-E2E3C5C79229,Weihua,Lei,Weihua Lei
1,1969-12-31 18:00:00,"(0, 0)",D40E7F30-C6F1-45FD-80D2-50AEDBAAF4A3,Simone,Bianconi,Simone Bianconi
2,1969-12-31 18:00:00,"(0, 0)",0DD045E1-0166-461B-B8CF-EA57DBFB5AAD,Hexia,Guo,Hexia Guo
3,2018-09-09 10:15:26,"(42.044888996511816, -87.67779573069726)",537DF5B3-48B8-4C86-A39F-5DE5DFEE19EC,Aaron,Stone,Aaron Stone
4,2018-09-09 14:37:13,"(41.99412885968501, -87.75289160301857)",8E710E79-A370-42B9-9D2C-ECA8623C1F3B,Amy,Angarita,Amy Angarita
5,2018-09-09 18:52:46,"(41.96524016959384, -87.67770970806525)",5899504E-1461-48DE-9ACC-FB9F2A1FDAF8,Spencer,Carlson,Spencer Carlson
6,2018-09-09 19:42:03,"(42.03267753502679, -87.68426771410498)",EB73A542-9446-4AFC-BC03-09FA3DA71CEA,Kenton,Hicks,Kenton Hicks
7,2018-09-09 19:44:54,"(42.04139959023596, -87.68739708767194)",E2DAC389-DD64-4AF8-934A-6D1EF7D68507,Ryan,Louie,Ryan Louie
8,2018-09-09 23:21:58,"(42.045993252215325, -87.68847202789867)",AB80C727-EACF-437C-8460-735DEB25361E,Yuehan,Yao,Yuehan Yao
9,2018-09-09 23:45:34,"(42.05796781086038, -87.68290993957196)",600C7D5C-EF49-4C95-B0D1-135DBBE0BE5C,Chulin,Wang,Chulin Wang


In [343]:
user_map_markers = user_last_locations_df[['name', 'vendorId', 'location', 'local_timestamp']].to_dict('records')
user_locations = [user['location'] for user in user_map_markers]

info_box_template = """
<dl>
<dt>Name</dt><dd>{name}</dd>
<dt>VendorId</dt><dd>{vendorId}</dd>
<dt>Timestamp</dt><dd>{local_timestamp}</dd>
</dl>
"""
user_info = [info_box_template.format(**user) for user in user_map_markers]

marker_layer = gmaps.marker_layer(user_locations, info_box_content=user_info)
fig = gmaps.figure()
fig.add_layer(marker_layer)
fig

Figure(layout=FigureLayout(height='420px'))

# Preference-User Counts 
Determine how many users could potentially be notified, given the preferences they specified. We want to see how different our ordering vs. the ordering that may be determined by preference might be.

In [315]:
def compute_preference_count(pref_dict, users):
    """
    Counts the number of people who would prefer each piece of information in the scaffold.
    
    Input:
        pref_dict (dict): dictionary of location type : questions that contains scaffold structure
        users (DataFrame): users and their preferences
    
    Return:
        (dict) pref_dict with counts for each question
    """
    for index, row in users.iterrows():
        curr_preferences = row['preferences']
        
        for location_type, location_prefs in curr_preferences.items():
            for question, pref_array in location_prefs.items():
                if pref_dict[location_type][question] == '':
                    pref_dict[location_type][question] = 1 if len(pref_array) > 0 else 0
                else: 
                    pref_dict[location_type][question] += 1 if len(pref_array) > 0 else 0
    
    return pref_dict

def compute_number_interested(pref_dict, users):
    """
    Counts the number of people who are interested in knowing anything about the task location category.
    
    Input:
        pref_dict (dict): dictionary of location type : questions that contains scaffold structure
        users (DataFrame): users and their preferences
    
    Return:
        (dict) pref_dict with counts for each location category
    """
    for index, row in users.iterrows():
        curr_preferences = row['preferences']
        
        for location_type, location_prefs in curr_preferences.items():
            # check if any location prefs have at least 1 entry
            has_one = False
            for question, pref_array in location_prefs.items():
                if len(pref_array) > 0:
                    has_one = True
                    break
            
            # increase user interested if at least one preference exists
            if has_one:
                pref_dict[location_type] += 1
    
    return pref_dict

def compute_prop_interested(pref_dict, users):
    """
    Proportion of people who are interested in knowing anything about the task location category.
    
    Input:
        pref_dict (dict): dictionary of location type : questions that contains scaffold structure
        users (DataFrame): users and their preferences
    
    Return:
        (dict) pref_dict with proportion for each location category
    """
    counts_bycategory = compute_number_interested(pref_dict, users)
    n_users = len(users)
    
    for location_type, count in counts_bycategory.items(): 
        counts_bycategory[location_type] = count / n_users
    
    return counts_bycategory

In [316]:
compute_number_interested({'freefood': 0, 'coffeeshop': 0, 'workspace': 0, 'gym': 0}, users_4x)

NameError: name 'users_4x' is not defined

In [None]:
compute_preference_count(deepcopy(location_scaffolds), users_4x)

In [None]:
compute_number_interested({'freefood': 0, 'coffeeshop': 0, 'workspace': 0, 'gym': 0}, users_opp_dist)

In [None]:
compute_preference_count(deepcopy(location_scaffolds), users_opp_dist)

In [None]:
compute_number_interested({'freefood': 0, 'coffeeshop': 0, 'workspace': 0, 'gym': 0}, users_opp_loc)

In [None]:
compute_preference_count(deepcopy(location_scaffolds), users_opp_loc)

In [None]:
interest_count_4x = compute_number_interested({'freefood': 0, 'coffeeshop': 0, 'workspace': 0, 'gym': 0}, users_4x)
interest_count_opp_dist = compute_number_interested({'freefood': 0, 'coffeeshop': 0, 'workspace': 0, 'gym': 0}, users_opp_dist)
interest_count_opp_loc = compute_number_interested({'freefood': 0, 'coffeeshop': 0, 'workspace': 0, 'gym': 0}, users_opp_loc)
total_users = len(users_4x) + len(users_opp_dist) + len(users_opp_loc)

overall_counts = {x_key: x_val + interest_count_opp_dist[x_key] + interest_count_opp_loc[x_key]
                  for x_key, x_val in interest_count_4x.items()}
overall_prop = {x_key: x_val / total_users for x_key, x_val in overall_counts.items()}

In [None]:
overall_prop

In [None]:
def count_prefs(all_pref_list):
    output = {}
    for pref in all_pref_list:
        if pref in output:
            output[pref] += 1
        else:
            output[pref] = 1
    
    return output

In [None]:
food_prefs_4x = [food_type for preference in users_4x['preferences'].tolist() for food_type in preference['freefood']['foodtype']]
food_prefs_opp_dist = [food_type for preference in users_opp_dist['preferences'].tolist() for food_type in preference['freefood']['foodtype']]
food_prefs_opp_loc = [food_type for preference in users_opp_loc['preferences'].tolist() for food_type in preference['freefood']['foodtype']]
total_users = len(users_4x) + len(users_opp_dist) + len(users_opp_loc)

overal_food_pref_count = count_prefs(food_prefs_4x + food_prefs_opp_dist + food_prefs_opp_loc)
overal_food_pref_prop = {x_key: x_val / total_users for x_key, x_val in overal_food_pref_count.items()}

In [None]:
overal_food_pref_prop

# Data Dashboard
This section of the notebook is used to monitor the data coming in from the study. Some measures we see here may be used within the paper, but the core purpose of this section is for monitoring the study.

In [None]:
def compute_count_prop_byloc(data, location_col, response_col):
    """
    Computes and returns a groupby DataFrame with counts and proportions of responses, by location type.
    
    Input:
        data (DataFrame): must include columns for location_col and response_col to aggregate on.
        location_col (string): location column to aggregate on (e.g. locationType, locationName)
        response_col (string): response to aggregate (e.g. remappedResponses, emaResponse)
    
    Output:
        (groupby DataFrame): aggregated data, by location
    """
    count_byloc = data.groupby([location_col, response_col]).apply(lambda x: pd.Series({'count': x[response_col].count()},
                                                                                       index=['count']))
    prop_byloc = count_byloc.groupby(level=0, as_index=False).apply(lambda x: 100 * x / float(x.sum())).add_suffix('_proportion')

    # combine count and proportion
    combined_byloc = pd.concat([count_byloc, prop_byloc], axis=1)
    
    return combined_byloc

In [None]:
def compute_count_prop_overall(data, response_col):
    """
    Computes and returns a DataFrame with counts and proportions of responses.
    
    Input:
        data (DataFrame): must include column for response_col to aggregate on
        response_col (string): response to aggregate (e.g. remappedResponses, emaResponse)
    
    Output:
        (DataFrame): aggregated data
    """
    combined_overall = data.groupby([response_col])[response_col].count().reset_index(name='count')
    combined_overall['percentage'] = 100.0 * combined_overall['count'] / sum(combined_overall['count'])
    combined_overall.loc[len(combined_overall)] = ['Total', sum(combined_overall['count']), 100.0]
    
    return combined_overall

In [None]:
def compute_valid_count_prop_byuser(data, users, user_col, response_col, valid_responses):
    """
    Computes and returns a groupby DataFrame with counts and proportions of valid responses, by user.
    
    Input:
        data (DataFrame): must include columns for user_col and response_col to aggregate on.
        users (DataFrame): users to include for no responses
        user_col (string): user column to aggregate on (e.g. vendorId)
        response_col (string): response to aggregate (e.g. remappedResponses, emaResponse)
        valid_responses (list of strings): list of valid responses to include in final output
    
    Output:
        (groupby DataFrame): aggregated data when valid response exists, by user
    """
    count_byuser = data.groupby([user_col, response_col]).apply(lambda x: pd.Series({'count': x[response_col].count()},
                                                                                       index=['count']))
    prop_byuser = count_byuser.groupby(level=0, as_index=False).apply(lambda x: 100 * x / float(x.sum())).add_suffix('_proportion')

    # combine count and proportion
    combined_byloc = pd.concat([count_byuser, prop_byuser], axis=1).reset_index()
    
    # isolate only valid responses and return
    combined_byloc = combined_byloc[combined_byloc[response_col].isin(valid_responses)].reset_index(drop=True)
    
    # include all people in dataframe
    combined_byloc = combined_byloc.merge(users[['objectId', 'vendorId']], how='right')
    del combined_byloc['objectId']
    
    # fill blanks
    combined_byloc.fillna(value={
        response_col: 'No Responses',
        'count': 0,
        'count_proportion': 0.0
    }, inplace=True)
    
    return combined_byloc

## 4X: At Location

In [None]:
atloc_4x_count_prop_byloc = compute_count_prop_byloc(atloc_4x, 'locationType', 'remappedResponses')
atloc_4x_count_prop_byloc

In [39]:
atloc_4x_count_prop_overall = compute_count_prop_overall(atloc_4x, 'remappedResponses')
atloc_4x_count_prop_overall

Unnamed: 0,remappedResponses,count,percentage
0,"""I don't know"" Response",60,13.452915
1,Dismissed Notification,8,1.793722
2,Missed Notification,156,34.977578
3,Valid Response,222,49.775785
4,Total,446,100.0


In [40]:
print('Unique number of users notified: {}'.format(len(atloc_4x['vendorId'].unique())))

Unique number of users notified: 30


In [41]:
atloc_4x_valid_responses = ['Valid Response']
atloc_4x_valid_count_prop_byuser = compute_valid_count_prop_byuser(atloc_4x, users_4x, 'vendorId', 'remappedResponses',
                                                                   atloc_4x_valid_responses)
atloc_4x_valid_count_prop_byuser.head()

Unnamed: 0,vendorId,remappedResponses,count,count_proportion
0,0EC5DDE3-5D78-4025-AC41-EF6EEF9612FC,Valid Response,11.0,40.740741
1,163E0EF0-E16E-4D06-9037-69000FF4F199,Valid Response,10.0,58.823529
2,203FE158-68A6-496F-B72E-274A4417983A,Valid Response,2.0,20.0
3,3508DCCF-243B-4B23-AC41-C514EF1671E8,Valid Response,3.0,33.333333
4,3950424A-7986-4771-B867-E4A5624D00ED,Valid Response,23.0,56.097561


## 4X: At Distance (with info)

In [42]:
atdist_4x_count_prop_byloc = compute_count_prop_byloc(atdist_4x, 'locationType', 'emaResponse')
atdist_4x_count_prop_byloc

Unnamed: 0_level_0,Unnamed: 1_level_0,count,count_proportion
locationType,emaResponse,Unnamed: 2_level_1,Unnamed: 3_level_1
coffeeshop,Missed Notification,9,30.0
coffeeshop,No. Other reason.,1,3.333333
coffeeshop,No. This info is useful but I have to be somewhere.,14,46.666667
coffeeshop,No. This info isn't useful to me.,5,16.666667
coffeeshop,Yes. This info is useful but I'm already going there.,1,3.333333
freefood,Missed Notification,8,34.782609
freefood,No. Other reason.,2,8.695652
freefood,No. This info is useful but I have to be somewhere.,5,21.73913
freefood,No. This info isn't useful to me.,4,17.391304
freefood,"Yes! This info is useful, I'm going now.",3,13.043478


In [43]:
atdist_4x_count_prop_overall = compute_count_prop_overall(atdist_4x, 'emaResponse')
atdist_4x_count_prop_overall

Unnamed: 0,emaResponse,count,percentage
0,Dismissed Notification,1,0.653595
1,Missed Notification,60,39.215686
2,No. I don't want to go out of my way there.,1,0.653595
3,No. Other reason.,3,1.960784
4,No. This info is useful but I have to be somew...,54,35.294118
5,No. This info isn't useful to me.,28,18.300654
6,"Yes! This info is useful, I'm going now.",4,2.614379
7,Yes. This info is useful but I'm already going...,2,1.30719
8,Total,153,100.0


In [44]:
print('Unique number of users notified: {}'.format(len(atdist_4x['vendorId'].unique())))

Unique number of users notified: 29


In [45]:
atdist_4x_valid_responses = ['Yes! This info is useful, I\'m going now.']
atdist_4x_valid_count_prop_byuser = compute_valid_count_prop_byuser(atdist_4x, users_4x, 'vendorId', 'emaResponse',
                                                                    atdist_4x_valid_responses)
atdist_4x_valid_count_prop_byuser.head()

Unnamed: 0,vendorId,emaResponse,count,count_proportion
0,3950424A-7986-4771-B867-E4A5624D00ED,"Yes! This info is useful, I'm going now.",1.0,20.0
1,B2CE1B90-BD84-4B33-AE61-435FCBC195BA,"Yes! This info is useful, I'm going now.",1.0,14.285714
2,F869C396-591A-4F19-895B-5E5871924B45,"Yes! This info is useful, I'm going now.",2.0,25.0
3,437A2325-CA4D-4426-9F1A-F89E412A00AC,No Responses,0.0,0.0
4,F57CB6D4-09B3-4796-AE91-2F31BADEE832,No Responses,0.0,0.0


## 4X: En Route

In [46]:
enroute_4x_count_prop_byloc = compute_count_prop_byloc(enroute_4x, 'locationType', 'remappedResponses')
enroute_4x_count_prop_byloc

Unnamed: 0_level_0,Unnamed: 1_level_0,count,count_proportion
locationType,remappedResponses,Unnamed: 2_level_1,Unnamed: 3_level_1
parkingspace,Valid Response,1,100.0


In [47]:
enroute_4x_count_prop_overall = compute_count_prop_overall(enroute_4x, 'remappedResponses')
enroute_4x_count_prop_overall

Unnamed: 0,remappedResponses,count,percentage
0,Valid Response,1,100.0
1,Total,1,100.0


In [48]:
print('Unique number of users notified: {}'.format(len(enroute_4x['vendorId'].unique())))

Unique number of users notified: 1


In [49]:
enroute_4x_valid_responses = ['Valid Response']
enroute_4x_valid_count_prop_byuser = compute_valid_count_prop_byuser(enroute_4x, users_4x, 'vendorId', 'remappedResponses',
                                                                     enroute_4x_valid_responses)
enroute_4x_valid_count_prop_byuser.head()

Unnamed: 0,vendorId,remappedResponses,count,count_proportion
0,F869C396-591A-4F19-895B-5E5871924B45,Valid Response,1.0,100.0
1,437A2325-CA4D-4426-9F1A-F89E412A00AC,No Responses,0.0,0.0
2,F57CB6D4-09B3-4796-AE91-2F31BADEE832,No Responses,0.0,0.0
3,684BC204-B3DE-4078-92C2-3DD5535A340F,No Responses,0.0,0.0
4,E89FDDC1-0ADA-4249-B9AD-40F24FD9DE30,No Responses,0.0,0.0


# Specific Location View 
This section of the notebook is used to monitor the data coming in from the study, for a specific location. For example, we might want to know how a free food location is doing. 

In [66]:
location_id_4x = 'sNfLGcUBAw'
location_id_opp_dist = 'vYBZxNqfKD'
location_id_opp_loc = 'Ajvw3InHvK'

## 4X: Specific Location View

In [67]:
atloc_4x[atloc_4x['taskLocationId'] == location_id_4x]

Unnamed: 0,taskLocationId,vendorId,beaconId,distanceToLocation,locationType,locationName,gmtOffset,notificationTimestamp,notificationString,question,response,remappedResponses,responseTimestamp
70,sNfLGcUBAw,9859FB99-038D-4BB8-B8FB-30470D06B57E,,6.535848,freefood,Ford Ground Floor (by the staircase),-21600,1520637269,"Notified for sNfLGcUBAw (42.057016, -87.676649...",Missed Notification,Missed Notification,Missed Notification,-1
71,sNfLGcUBAw,A0288977-976D-4831-93D4-8472FC842621,,29.364935,freefood,Ford Ground Floor (by the staircase),-21600,1520629698,"Notified for sNfLGcUBAw (42.057016, -87.676649...",stillleft,I don't know,"""I don't know"" Response",1520633926
72,sNfLGcUBAw,61433627-A7DB-4DFC-B135-9D1B0D0ED6D3,,9.015508,freefood,Ford Ground Floor (by the staircase),-21600,1520619193,"Notified for sNfLGcUBAw (42.057016, -87.676649...",foodtype,pizza,Valid Response,1520619756
73,sNfLGcUBAw,F57CB6D4-09B3-4796-AE91-2F31BADEE832,,25.3117,freefood,Ford Ground Floor (by the staircase),-21600,1520619305,"Notified for sNfLGcUBAw (42.057016, -87.676649...",stillleft,yes,Valid Response,1520621592


In [68]:
atdist_4x[atdist_4x['taskLocationId'] == location_id_4x]

Unnamed: 0,taskLocationId,vendorId,beaconId,distanceToLocation,bearingToLocation,locationType,locationName,notificationDistance,sentBy,infoIncluded,gmtOffset,notificationTimestamp,emaResponse,responseTimestamp
50,sNfLGcUBAw,9859FB99-038D-4BB8-B8FB-30470D06B57E,,164.535429,267.223445,freefood,Ford Ground Floor (by the staircase),300,geofence trip,True,-21600,1520639603,Missed Notification,-1
51,sNfLGcUBAw,3950424A-7986-4771-B867-E4A5624D00ED,,300.243505,32.281613,freefood,Ford Ground Floor (by the staircase),300,geofence trip,True,-21600,1520623394,Missed Notification,-1
52,sNfLGcUBAw,4B4115BA-BF8C-4F05-9B6E-7862FA08FC5F,,306.880076,30.076641,freefood,Ford Ground Floor (by the staircase),300,geofence trip,True,-21600,1520627032,Missed Notification,-1
53,sNfLGcUBAw,0EC5DDE3-5D78-4025-AC41-EF6EEF9612FC,,211.462153,42.169554,freefood,Ford Ground Floor (by the staircase),300,geofence trip,True,-21600,1520626305,No. Other reason.,1520626316
54,sNfLGcUBAw,E78771C1-08B3-4483-99D8-4A081E98DABC,,294.210782,102.853852,freefood,Ford Ground Floor (by the staircase),300,geofence trip,True,-21600,1520629076,Missed Notification,-1
55,sNfLGcUBAw,F869C396-591A-4F19-895B-5E5871924B45,,284.439683,6.775773,freefood,Ford Ground Floor (by the staircase),300,geofence trip,True,-21600,1520628912,No. This info is useful but I have to be somew...,1520629601
56,sNfLGcUBAw,95674A2B-6954-49C6-9456-A7F865CE686E,,320.467175,75.285564,freefood,Ford Ground Floor (by the staircase),300,geofence trip,True,-21600,1520639312,No. This info is useful but I have to be somew...,1520639320


# Paper Tables
This section creates the tables that we used for the CHI'18 version of the LES paper. They, for the most part, will be the final tables included in the paper.

In [72]:
def create_study_table(count_prop_df, location_col, response_col, column_dict, response_list):
    """
    Creates the equivalent table found in the paper using a count_prop table.
    
    Input: 
        count_prop_df (DataFrame): DataFrame with locationType, remappedResponses, count, and count_proportion
        location_col (str): column to get locations for columns (e.g. coffeeshops, freefood, etc.)
        response_col (str): column to get responses for rows (e.g. emaResponses, remappedResponses)
        column_dict (dict): columns to include (i.e. locationTypes) in table with label remaps
        response_list (list of str): responses to copy over to new table (e.g. Missed Notif)
    """
    # add additional columns to beginning of table
    columns = ['Response Type', 'Overall']
    columns += column_dict.keys()
    
    # add total row
    response_types = deepcopy(response_list)
    response_types += ['Total']
    
    # create output DataFrame
    output_dict = {'Response Type': response_types}
    output_df = pd.DataFrame(output_dict)
    
    # populate each cell
    for col in columns:
        # ignore first response type column
        if col == 'Response Type':
            continue

        # get counts for each row)
        for row in response_types: 
            if row != 'Total':
                if col == 'Overall':
                    output_df.loc[response_types.index(row), col] = count_prop_df.loc[count_prop_df[response_col] == row, 'count'].sum()
                else:
                    output_df.loc[response_types.index(row), col] = count_prop_df.loc[(count_prop_df[location_col] == col) &
                                                                                      (count_prop_df[response_col] == row), 'count'].sum()

        # get total
        output_df.loc[response_types.index('Total'), col] = output_df[0:-1][col].sum()

        # create proportion and save
        epsilon = 0.00000000001
        col_proportions = (100 * output_df[col] / (float(output_df.loc[response_types.index('Total'), col]) + epsilon)).astype(np.double).round(2)
        output_df[col] = col_proportions.astype(str) + '% (' + output_df[col].astype(np.int).astype(str) + ')'
        
    # reorder columns
    output_df.columns = columns
    
    # remap column names
    output_df.rename(columns=column_dict, inplace=True)
    
    return output_df

In [73]:
location_remapping = {'coffeeshop': 'Coffee Shops', 'freefood': 'Free Food', 'gym': 'Gyms', 'workspace': 'Workspaces'}
atloc_response_list = ['Valid Response', '"I don\'t know" Response',
                        'Dismissed Notification', 'Missed Notification']
atdist_info_response_list = ['Yes! This info is useful, I\'m going now.',
                            'Yes. This info is useful but I\'m already going there.',
                            'No. This info is useful but I have to be somewhere.',
                            'No. This info isn\'t useful to me.',
                            'No. Other reason.',
                            'Dismissed Notification',
                            'Missed Notification']
atdist_noinfo_response_list = ['Sure! I would be happy to go out of my way!',
                               'Sure, but I was going to walk past it anyway.',
                               'No. I don\'t want to go out of my way there.',
                               'No. Other reason.',
                               'Dismissed Notification',
                               'Missed Notification']

## Comparing Response Rates: At Location

### 4X Study Table: At Location (eXplore)

In [74]:
atloc_4x_tabledata = atloc_4x_count_prop_byloc.reset_index()
create_study_table(atloc_4x_tabledata, 'locationType', 'remappedResponses',
                   location_remapping, atloc_response_list)

Unnamed: 0,Response Type,Overall,Coffee Shops,Free Food,Gyms,Workspaces
0,Valid Response,49.78% (222),43.14% (88),41.94% (13),53.76% (50),60.17% (71)
1,"""I don't know"" Response",13.45% (60),17.65% (36),41.94% (13),7.53% (7),3.39% (4)
2,Dismissed Notification,1.79% (8),1.96% (4),3.23% (1),2.15% (2),0.85% (1)
3,Missed Notification,34.98% (156),37.25% (76),12.9% (4),36.56% (34),35.59% (42)
4,Total,100.0% (446),100.0% (204),100.0% (31),100.0% (93),100.0% (118)


### 4X Study Data: At Location (eXploit)

In [75]:
enroute_4x_tabledata = enroute_4x_count_prop_byloc.reset_index()
create_study_table(enroute_4x_tabledata, 'locationType', 'remappedResponses',
                   location_remapping, atloc_response_list)

Unnamed: 0,Response Type,Overall,Coffee Shops,Free Food,Gyms,Workspaces
0,Valid Response,100.0% (1),0.0% (0),0.0% (0),0.0% (0),0.0% (0)
1,"""I don't know"" Response",0.0% (0),0.0% (0),0.0% (0),0.0% (0),0.0% (0)
2,Dismissed Notification,0.0% (0),0.0% (0),0.0% (0),0.0% (0),0.0% (0)
3,Missed Notification,0.0% (0),0.0% (0),0.0% (0),0.0% (0),0.0% (0)
4,Total,100.0% (1),0.0% (0),0.0% (0),0.0% (0),0.0% (0)


## Comparing Response Rates: At Distance

### 4X Study Table: At Distance (eXpand, with info)

In [78]:
atdist_info_4x_tabledata = atdist_4x_count_prop_byloc.reset_index()
create_study_table(atdist_info_4x_tabledata, 'locationType', 'emaResponse',
                   location_remapping, atdist_info_response_list)

Unnamed: 0,Response Type,Overall,Coffee Shops,Free Food,Gyms,Workspaces
0,"Yes! This info is useful, I'm going now.",2.63% (4),0.0% (0),13.04% (3),1.56% (1),0.0% (0)
1,Yes. This info is useful but I'm already going...,1.32% (2),3.33% (1),4.35% (1),0.0% (0),0.0% (0)
2,No. This info is useful but I have to be somew...,35.53% (54),46.67% (14),21.74% (5),31.25% (20),42.86% (15)
3,No. This info isn't useful to me.,18.42% (28),16.67% (5),17.39% (4),21.87% (14),14.29% (5)
4,No. Other reason.,1.97% (3),3.33% (1),8.7% (2),0.0% (0),0.0% (0)
5,Dismissed Notification,0.66% (1),0.0% (0),0.0% (0),0.0% (0),2.86% (1)
6,Missed Notification,39.47% (60),30.0% (9),34.78% (8),45.31% (29),40.0% (14)
7,Total,100.0% (152),100.0% (30),100.0% (23),100.0% (64),100.0% (35)
