# 4X Study Dashboard
This notebook is used to analyze data from the 4X study conducted in March 2018.

# Imports and Global Setup

In [1]:
# data processing
import math
import json
import itertools
from multiprocessing import Pool, cpu_count
from multiprocessing.dummy import Pool as ThreadPool 
from functools import reduce
from collections import Counter

import requests
import os
from pymongo import MongoClient
client = MongoClient(os.environ['MONGO_URI'])
db = client['les-4x-su2018']

import pandas as pd
import numpy as np
from scipy import stats

from datetime import datetime, timezone
from copy import deepcopy
from tqdm import tqdm_notebook as tqdm

In [2]:
# google
import gspread
from oauth2client.service_account import ServiceAccountCredentials
from operator import itemgetter

In [3]:
# google maps
import os
import gmaps
import gmaps.datasets

gmaps.configure(api_key=os.environ['GMAPS'])

In [4]:
# plotting
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
%matplotlib inline

In [5]:
# palette
sns.set(font_scale=1.5, style='whitegrid')
# sns.set_palette("cubehelix")
sns.set_palette(sns.cubehelix_palette(rot=-.4))

# Load in data

In [6]:
# URLs for different conditions
url = 'https://les-expand.herokuapp.com/parse/classes/'

# shared header and data
header = {'X-Parse-Application-Id': 'PkngqKtJygU9WiQ1GXM9eC0a17tKmioKKmpWftYr'}
data = {'limit': '10000'}

# study start and end
start_time = '2018-08-27 05:00:00'
end_time = '2018-09-10 05:00:00'
print('Study Length: {}'.format(datetime.strptime(end_time, '%Y-%m-%d %H:%M:%S') - datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S')))

Study Length: 14 days, 0:00:00


In [7]:
def load_data(base_url, header, data, start_time, end_time):
    """
    Loads in all needed tables from database, given url.
    
    Input: 
        base_url (string): url to pull data from
        header (dict): application id and other auth
        data (dict): data to pass into query
        start_time (datetime): start time for data
        end_time (datetime): end time for data 
    
    Return:
        (dict): dict where keys are collection names and values are Pandas objects containing data
    """
    # declare collection list
    collection_list = ['_User', 'ServerLog', 'DebugLog', 'ForYouViewLog', 'ApplicationHeartbeat',
                       'TaskLocations', 'LocationTypeMetadata', 'beacons', 'EnRouteLocations',
                       'AtLocationNotificationsSent', 'AtLocationNotificationResponses',
                       'EnRouteNotificationsSent', 'EnRouteNotificationResponses',
                       'AtDistanceNotificationsSent', 'AtDistanceNotificationResponses']
    
    # loop through and load data for each collection
    output = {}
    for collection in tqdm(collection_list):
        current_response = requests.get(base_url + collection, headers=header, data=data)

        current_data = pd.DataFrame(current_response.json()['results'])
        if len(current_data) != 0 and collection not in ['LocationTypeMetadata', 'EnRouteLocations']:
            current_data['createdAt'] = pd.to_datetime(current_data['createdAt'])
            current_data['updatedAt'] = pd.to_datetime(current_data['updatedAt'])
            
            if collection != '_User':
                current_data = current_data[(current_data['createdAt'] >= start_time) & (current_data['createdAt'] < end_time)]

        output[collection] = current_data
    
    return output

def load_data_parallel(url):
    return load_data(url, header, data, start_time, end_time)

In [8]:
# fetch log data
raw_data = load_data(url, header, data, start_time, end_time)

HBox(children=(IntProgress(value=0, max=15), HTML(value='')))




## Location Data Directly from MongoDB

In [9]:
location_updates = pd.DataFrame(list(db['LocationUpdates'].find({})))

# Data Setup
This section of the notebook is used to monitor the data coming in from the study. Some measures we see here may be used within the paper

## Common Functions and Data

In [10]:
def get_merged_at_location(tasklocations, atlocnotif, atlocresp):
    """
    Sets up a Pandas DataFrame with (1) TaskLocation, (2) NotificationSent, and (3) NotificationResponse data
    merged together for AtLocation case.
    
    Input:
        tasklocations (DataFrame): DataFrame of TaskLocations
        atlocnotif (DataFrame): DataFrame of AtLocationNotificationsSent
        atlocresp (DataFrame): DataFrame of AtLocationNotificationResponses
    
    Return:
        (DataFrame): merged DataFrame of inputs
    """
    # get AtLocationNotifications without duplicates
    atlocnotif.drop_duplicates(subset=['taskLocationId', 'vendorId'], keep='last', inplace=True)
    atlocnotif.drop(['objectId', 'createdAt', 'updatedAt'], axis=1, inplace=True)

    # get AtLocationNotificationResponses without duplicates
    atlocresp.drop_duplicates(subset=['taskLocationId', 'vendorId'], keep='last', inplace=True)
    atlocresp.drop(['objectId', 'createdAt', 'updatedAt'], axis=1, inplace=True)
    
    # combine AtLocation notifications and responses, with some data from TaskLocations
    atloc = atlocnotif.merge(tasklocations[['objectId', 'locationType', 'locationName', 'beaconId']],
                             how='inner', left_on='taskLocationId', right_on='objectId')
    atloc = atloc.merge(atlocresp[['question', 'response', 'timestamp', 'taskLocationId', 'vendorId']],
                        how='left', on=['taskLocationId', 'vendorId'])
    
    # clean columns
    del atloc['objectId']
    atloc.rename(columns={'timestamp_x': 'notificationTimestamp', 'timestamp_y': 'responseTimestamp'},
                 inplace=True)
    
    # fill blank columns
    atloc[['question', 'response']] = atloc[['question', 'response']].fillna(value='Missed Notification')
    atloc[['distanceToLocation', 'responseTimestamp']] = atloc[['distanceToLocation', 'responseTimestamp']].fillna(value=-1)

    # type columns
    atloc_int_cols = ['gmtOffset','notificationTimestamp', 'responseTimestamp']
    atloc[atloc_int_cols] = atloc[atloc_int_cols].apply(lambda x: x.astype(np.int64))
    
    # add remappedResponses column 
    invalid_responses = ['I don\'t know', 'com.apple.UNNotificationDismissActionIdentifier', 'Missed Notification']
    atloc['remappedResponses'] = atloc['response']
    atloc.loc[~atloc['remappedResponses'].isin(invalid_responses), 'remappedResponses'] = 'Valid Response'
    atloc.loc[atloc['remappedResponses'] == 'com.apple.UNNotificationDismissActionIdentifier', 'remappedResponses'] = 'Dismissed Notification'
    atloc.loc[atloc['remappedResponses'] == 'I don\'t know', 'remappedResponses'] = '"I don\'t know" Response'
    
    # reorder columns
    atloc_col_ordering = ['taskLocationId', 'vendorId', 'beaconId', 'distanceToLocation',
                          'locationType', 'locationName','gmtOffset', 'notificationTimestamp', 'notificationString',
                          'question', 'response', 'remappedResponses', 'responseTimestamp']
    atloc = atloc[atloc_col_ordering]
    
    return atloc

In [11]:
def get_merged_at_distance(tasklocations, atdistnotif, atdistresp):
    """
    Sets up a Pandas DataFrame with (1) TaskLocation, (2) NotificationSent, and (3) NotificationResponse data
    merged together for AtDistance case.
    
    Input:
        tasklocations (DataFrame): DataFrame of TaskLocations
        atdistnotif (DataFrame): DataFrame of AtDistanceNotificationsSent
        atdistresp (DataFrame): DataFrame of AtDistanceNotificationResponses
    
    Return:
        (DataFrame): merged DataFrame of inputs
    """
    # get AtDistanceNotifications without duplicates
    atdistnotif.drop_duplicates(subset=['taskLocationId', 'vendorId'], keep='last', inplace=True)
    atdistnotif.drop(['objectId', 'createdAt', 'updatedAt'], axis=1, inplace=True)

    # get AtDistanceNotificationResponses without duplicates
    atdistresp.drop_duplicates(subset=['taskLocationId', 'vendorId'], keep='last', inplace=True)
    atdistresp.drop(['objectId', 'createdAt', 'updatedAt'], axis=1, inplace=True)
    
    # combine AtDistance notifications and responses, with some data from TaskLocations
    atdist = atdistnotif.merge(tasklocations[['objectId', 'beaconId', 'locationName']],
                               how='inner', left_on='taskLocationId', right_on='objectId')
    atdist = atdist.merge(atdistresp[['emaResponse', 'timestamp', 'taskLocationId', 'vendorId']],
                          how='left', on=['taskLocationId', 'vendorId'])
    
    # clean columns
    del atdist['objectId']
    atdist.rename(columns={'timestamp_x': 'notificationTimestamp', 'timestamp_y': 'responseTimestamp'}, inplace=True)

    atdist_col_ordering = ['taskLocationId', 'vendorId', 'beaconId', 'distanceToLocation', 'bearingToLocation',
                           'locationType', 'locationName', 'notificationDistance', 'sentBy', 'infoIncluded',
                           'gmtOffset', 'notificationTimestamp', 'emaResponse', 'responseTimestamp']
    atdist = atdist[atdist_col_ordering]
    
    # fill blank columns
    atdist['emaResponse'] = atdist['emaResponse'].fillna(value='Missed Notification')
    atdist['responseTimestamp'] = atdist['responseTimestamp'].fillna(value=-1)
    
    # remap columns
    atdist.loc[atdist['emaResponse'] == 'com.apple.UNNotificationDismissActionIdentifier', 'emaResponse'] = 'Dismissed Notification'

    # type columns
    atdist_int_cols = ['gmtOffset','notificationTimestamp', 'responseTimestamp']
    atdist[atdist_int_cols] = atdist[atdist_int_cols].apply(lambda x: x.astype(np.int64))
    
    return atdist

In [12]:
def get_merged_en_route(enroutelocations, enroutenotif, enrouteresp):
    """
    Sets up a Pandas DataFrame with (1) EnRouteLocations, (2) NotificationSent, and (3) NotificationResponse data
    merged together for EnRoute case.
    
    Input:
        enroutelocations (DataFrame): DataFrame of EnRouteLocations
        enroutenotif (DataFrame): DataFrame of EnRouteNotificationsSent
        enrouteresp (DataFrame): DataFrame of EnRouteNotificationResponses
    
    Return:
        (DataFrame): merged DataFrame of inputs
    """
    # get EnRouteNotifications without duplicates
    enroutenotif.drop_duplicates(subset=['enRouteLocationId', 'vendorId'], keep='last', inplace=True)
    enroutenotif.drop(['objectId', 'createdAt', 'updatedAt'], axis=1, inplace=True)

    # get AtDistanceNotificationResponses without duplicates
    enrouteresp.drop_duplicates(subset=['enRouteLocationId', 'vendorId'], keep='last', inplace=True)
    enrouteresp.drop(['objectId', 'createdAt', 'updatedAt'], axis=1, inplace=True)
    
    # combine EnRouteNotifications and responses, with some data from EnRouteLocations
    enroute = enroutenotif.merge(enroutelocations[['objectId', 'locationName', 'locationType']],
                               how='inner', left_on='enRouteLocationId', right_on='objectId')
    enroute = enroute.merge(enrouteresp[['questionResponse', 'timestamp', 'enRouteLocationId', 'vendorId']],
                            how='left', on=['enRouteLocationId', 'vendorId'])
    
    # clean columns
    del enroute['objectId']
    enroute.rename(columns={'timestamp_x': 'notificationTimestamp', 'timestamp_y': 'responseTimestamp'}, inplace=True)

    enroute_col_ordering = ['enRouteLocationId', 'vendorId', 'distanceToLocation', 'locationType', 'locationName',
                           'gmtOffset', 'notificationTimestamp', 'questionResponse', 'responseTimestamp']
    enroute = enroute[enroute_col_ordering]
    
    # fill blank columns
    enroute['questionResponse'] = enroute['questionResponse'].fillna(value='Missed Notification')
    enroute.loc[enroute['questionResponse'] == 'com.apple.UNNotificationDismissActionIdentifier', 'questionResponse'] = 'Dismissed Notification'
    
    enroute['responseTimestamp'] = enroute['responseTimestamp'].fillna(value=-1)
    
     # add validResponse column 
    invalid_responses = ['I don\'t know', 'com.apple.UNNotificationDismissActionIdentifier', 'Missed Notification']
    enroute['remappedResponses'] = enroute['questionResponse']
    enroute.loc[~enroute['remappedResponses'].isin(invalid_responses), 'remappedResponses'] = 'Valid Response'
    enroute.loc[enroute['remappedResponses'] == 'I don\'t know', 'remappedResponses'] = '"I don\'t know" Response'

    # type columns
    enroute_int_cols = ['gmtOffset','notificationTimestamp', 'responseTimestamp']
    enroute[enroute_int_cols] = enroute[enroute_int_cols].apply(lambda x: x.astype(np.int64))
    
    return enroute

In [13]:
def get_dead_apps(serverlog):
    """
    Returns a list of lists for dead apps that server has notified.
    
    Input: 
        server (DataFrame): DataFrame of ServerLog
    
    Return:
        (list of lists of strings): all dead applications notified via push
    """
    notify_log_strings = serverlog[serverlog['logString'].str.contains('Notified dead')]['logString']
    deadapp_notif_list = list(notify_log_strings.apply(lambda x: x[x.find('[') + 1:-1].split(', ')))
    return deadapp_notif_list

In [14]:
location_scaffolds = {}
for index, row in raw_data['LocationTypeMetadata'].iterrows():
    location_scaffolds[row['locationType']] = row['scaffold']

## Data Setup

In [15]:
serverlog = deepcopy(raw_data['ServerLog'])
deadapp_notify = get_dead_apps(serverlog)
print('Last dead apps notified (count = {}): \n{}'.format(len(deadapp_notify[-1]),
                                                          '\n'.join(deadapp_notify[-1])))

flattened_deadapps = reduce(lambda x, y: x + y, deadapp_notify, [])
Counter(flattened_deadapps)

Last dead apps notified (count = 4): 
0DD045E1-0166-461B-B8CF-EA57DBFB5AAD
8E710E79-A370-42B9-9D2C-ECA8623C1F3B
32019C1D-6090-44C7-8746-E2E3C5C79229
EE6F6004-D0FF-4AC3-AC8E-0A9125982854


Counter({'0DD045E1-0166-461B-B8CF-EA57DBFB5AAD': 3,
         '8E710E79-A370-42B9-9D2C-ECA8623C1F3B': 3,
         '52CF47E5-C543-45F3-9CA6-23308C2B1E57': 1,
         '32019C1D-6090-44C7-8746-E2E3C5C79229': 1,
         'EE6F6004-D0FF-4AC3-AC8E-0A9125982854': 1})

In [16]:
users = deepcopy(raw_data['_User'])
users = users[users['vendorId'] != '']

tasklocations = deepcopy(raw_data['TaskLocations'])
enroutelocations = deepcopy(raw_data['EnRouteLocations'])

atlocnotif = deepcopy(raw_data['AtLocationNotificationsSent'])
atlocresp = deepcopy(raw_data['AtLocationNotificationResponses'])

atdistnotif = deepcopy(raw_data['AtDistanceNotificationsSent'])
atdistresp = deepcopy(raw_data['AtDistanceNotificationResponses'])

enroutenotif = deepcopy(raw_data['EnRouteNotificationsSent'])
enrouteresp = deepcopy(raw_data['EnRouteNotificationResponses'])

foryou = deepcopy(raw_data['ForYouViewLog'])

print('4X | User Count: {}'.format(len(users)))
print('4X | At location notifications: {}, At location responses: {}'.format(len(atlocnotif), len(atlocresp)))
print('4X | At distance notifications: {}, At distance responses: {}'.format(len(atdistnotif), len(atdistresp)))
print('4X | En route notifications: {}, En route responses: {}'.format(len(enroutenotif), len(enrouteresp)))

4X | User Count: 20
4X | At location notifications: 345, At location responses: 107
4X | At distance notifications: 170, At distance responses: 83
4X | En route notifications: 391, En route responses: 27


In [17]:
# exclude kapil and rob
user_exclude_ids = [
    '20E1994C-9296-466F-B8FB-B5804C1C2121', # kapil
    '88991A9A-2302-4359-B8AE-4E2F2505E6AE', # rob
    '' # random blank id
]

users = users[~users['vendorId'].isin(user_exclude_ids)]

atlocnotif = atlocnotif[~atlocnotif['vendorId'].isin(user_exclude_ids)]
atlocresp = atlocresp[~atlocresp['vendorId'].isin(user_exclude_ids)]

atdistnotif = atdistnotif[~atdistnotif['vendorId'].isin(user_exclude_ids)]
atdistresp = atdistresp[~atdistresp['vendorId'].isin(user_exclude_ids)]

enroutenotif = enroutenotif[~enroutenotif['vendorId'].isin(user_exclude_ids)]
enrouteresp = enrouteresp[~enrouteresp['vendorId'].isin(user_exclude_ids)]

foryou = foryou[~foryou['vendorId'].isin(user_exclude_ids)]

location_updates = location_updates[~location_updates['vendorId'].isin(user_exclude_ids)]

print('4X | User Count: {}'.format(len(users)))
print('4X | At location notifications: {}, At location responses: {}'.format(len(atlocnotif), len(atlocresp)))
print('4X | At distance notifications: {}, At distance responses: {}'.format(len(atdistnotif), len(atdistresp)))
print('4X | En route notifications: {}, En route responses: {}'.format(len(enroutenotif), len(enrouteresp)))

4X | User Count: 18
4X | At location notifications: 228, At location responses: 104
4X | At distance notifications: 140, At distance responses: 81
4X | En route notifications: 391, En route responses: 27


### Additional Setup for LocationUpdates

In [18]:
location_updates['location'] = location_updates.apply(lambda x: (x['latitude'], x['longitude']), axis=1)
location_updates.rename(columns={'_created_at': 'createdAt', '_id': 'objectId', '_updated_at': 'updatedAt'}, inplace=True)

In [19]:
# merged data frame with all AtLocation data
atloc = get_merged_at_location(deepcopy(tasklocations),
                                  deepcopy(atlocnotif),
                                  deepcopy(atlocresp))

In [20]:
# merged data frame with all AtDistance data
atdist = get_merged_at_distance(deepcopy(tasklocations),
                                   deepcopy(atdistnotif),
                                   deepcopy(atdistresp))
atdist = atdist[atdist['infoIncluded'] == True] # 4X Only: remove cases without info

In [21]:
# eXpand providing more data at location overall
atdist_overall_moreinfo = atdist[atdist['emaResponse'].isin(["Yes! This info is useful. I'm going to go there.", "Yes. This info is useful but I'm already going there."])]
atdist_overall_moreinfo = atdist_overall_moreinfo.merge(atloc, how='left', on=['vendorId', 'taskLocationId'])
atdist_overall_moreinfo['remappedResponses'].fillna(value='Did Not Go', inplace=True)
atdist_overall_moreinfo

Unnamed: 0,taskLocationId,vendorId,beaconId_x,distanceToLocation_x,bearingToLocation,locationType_x,locationName_x,notificationDistance,sentBy,infoIncluded,...,distanceToLocation_y,locationType_y,locationName_y,gmtOffset_y,notificationTimestamp_y,notificationString,question,response,remappedResponses,responseTimestamp_y
0,IeRHOjPCB0,AB80C727-EACF-437C-8460-735DEB25361E,,87.094625,201.42926,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,27.368637,freefood,Tech Ground Floor (outside LR3 classroom),-18000.0,1535570000.0,"Notified for IeRHOjPCB0 (42.057569, -87.676077...",kindfood,sweet,Valid Response,1535570000.0
1,IeRHOjPCB0,5899504E-1461-48DE-9ACC-FB9F2A1FDAF8,,86.05708,239.268627,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,19.360045,freefood,Tech Ground Floor (outside LR3 classroom),-18000.0,1535572000.0,"Notified for IeRHOjPCB0 (42.057569, -87.676077...",specificfood,ice cream,Valid Response,1535572000.0
2,qEkg5qqzH6,5899504E-1461-48DE-9ACC-FB9F2A1FDAF8,,86.375498,300.020887,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,28.646216,freefood,Tech Ground Floor (outside LR3 classroom),-18000.0,1535660000.0,"Notified for qEkg5qqzH6 (42.057569, -87.676077...",kindfood,sweet,Valid Response,1535660000.0
3,qEkg5qqzH6,E2DAC389-DD64-4AF8-934A-6D1EF7D68507,,85.939565,282.469459,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,22.316348,freefood,Tech Ground Floor (outside LR3 classroom),-18000.0,1535665000.0,"Notified for qEkg5qqzH6 (42.057569, -87.676077...",typefood,vanilla,Valid Response,1535665000.0
4,qEkg5qqzH6,AB80C727-EACF-437C-8460-735DEB25361E,,85.210044,95.501341,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,14.698409,freefood,Tech Ground Floor (outside LR3 classroom),-18000.0,1535662000.0,"Notified for qEkg5qqzH6 (42.057569, -87.676077...",specificfood,ice cream,Valid Response,1535663000.0
5,tryP91lXSi,9A9310F7-B69A-414F-803D-CF2153014C25,,165.946711,15.722026,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,,,,,,,,,Did Not Go,
6,N3Y1IB2Oo3,D40E7F30-C6F1-45FD-80D2-50AEDBAAF4A3,rIzJ4nI1lp,270.57026,284.280614,gym,SPAC,300,location updates,True,...,,,,,,,,,Did Not Go,
7,N3Y1IB2Oo3,537DF5B3-48B8-4C86-A39F-5DE5DFEE19EC,rIzJ4nI1lp,262.321176,297.218504,gym,SPAC,300,location updates,True,...,-1.0,gym,SPAC,-18000.0,1536185000.0,Notified for beacon region rIzJ4nI1lp,freeweights,yes,Valid Response,1536189000.0
8,rDZmVyHjYQ,F262544F-8C00-4362-87B2-AF8FEE66DB4D,,319.738739,296.913106,freefood,Tech Ground Floor (outside LR3 classroom),300,geofence trip,True,...,26.864289,freefood,Tech Ground Floor (outside LR3 classroom),-18000.0,1536175000.0,"Notified for rDZmVyHjYQ (42.057569, -87.676077...",typefood,I don't know,"""I don't know"" Response",1536176000.0
9,rDZmVyHjYQ,600C7D5C-EF49-4C95-B0D1-135DBBE0BE5C,,79.92116,24.705004,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,16.648914,freefood,Tech Ground Floor (outside LR3 classroom),-18000.0,1536173000.0,"Notified for rDZmVyHjYQ (42.057569, -87.676077...",Missed Notification,Missed Notification,Missed Notification,-1.0


In [22]:
atdist_overall_moreinfo['time_diff_seconds'] = (atdist_overall_moreinfo['notificationTimestamp_y'].fillna(0) - atdist_overall_moreinfo['responseTimestamp_x']).astype(int)
atdist_overall_moreinfo['time_diff_minutes'] = atdist_overall_moreinfo['time_diff_seconds'] / 60
atdist_overall_moreinfo

Unnamed: 0,taskLocationId,vendorId,beaconId_x,distanceToLocation_x,bearingToLocation,locationType_x,locationName_x,notificationDistance,sentBy,infoIncluded,...,locationName_y,gmtOffset_y,notificationTimestamp_y,notificationString,question,response,remappedResponses,responseTimestamp_y,time_diff_seconds,time_diff_minutes
0,IeRHOjPCB0,AB80C727-EACF-437C-8460-735DEB25361E,,87.094625,201.42926,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,Tech Ground Floor (outside LR3 classroom),-18000.0,1535570000.0,"Notified for IeRHOjPCB0 (42.057569, -87.676077...",kindfood,sweet,Valid Response,1535570000.0,135,2.25
1,IeRHOjPCB0,5899504E-1461-48DE-9ACC-FB9F2A1FDAF8,,86.05708,239.268627,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,Tech Ground Floor (outside LR3 classroom),-18000.0,1535572000.0,"Notified for IeRHOjPCB0 (42.057569, -87.676077...",specificfood,ice cream,Valid Response,1535572000.0,71,1.183333
2,qEkg5qqzH6,5899504E-1461-48DE-9ACC-FB9F2A1FDAF8,,86.375498,300.020887,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,Tech Ground Floor (outside LR3 classroom),-18000.0,1535660000.0,"Notified for qEkg5qqzH6 (42.057569, -87.676077...",kindfood,sweet,Valid Response,1535660000.0,93,1.55
3,qEkg5qqzH6,E2DAC389-DD64-4AF8-934A-6D1EF7D68507,,85.939565,282.469459,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,Tech Ground Floor (outside LR3 classroom),-18000.0,1535665000.0,"Notified for qEkg5qqzH6 (42.057569, -87.676077...",typefood,vanilla,Valid Response,1535665000.0,214,3.566667
4,qEkg5qqzH6,AB80C727-EACF-437C-8460-735DEB25361E,,85.210044,95.501341,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,Tech Ground Floor (outside LR3 classroom),-18000.0,1535662000.0,"Notified for qEkg5qqzH6 (42.057569, -87.676077...",specificfood,ice cream,Valid Response,1535663000.0,2164,36.06667
5,tryP91lXSi,9A9310F7-B69A-414F-803D-CF2153014C25,,165.946711,15.722026,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,,,,,,,Did Not Go,,-1536340883,-25605680.0
6,N3Y1IB2Oo3,D40E7F30-C6F1-45FD-80D2-50AEDBAAF4A3,rIzJ4nI1lp,270.57026,284.280614,gym,SPAC,300,location updates,True,...,,,,,,,Did Not Go,,-1536183022,-25603050.0
7,N3Y1IB2Oo3,537DF5B3-48B8-4C86-A39F-5DE5DFEE19EC,rIzJ4nI1lp,262.321176,297.218504,gym,SPAC,300,location updates,True,...,SPAC,-18000.0,1536185000.0,Notified for beacon region rIzJ4nI1lp,freeweights,yes,Valid Response,1536189000.0,557,9.283333
8,rDZmVyHjYQ,F262544F-8C00-4362-87B2-AF8FEE66DB4D,,319.738739,296.913106,freefood,Tech Ground Floor (outside LR3 classroom),300,geofence trip,True,...,Tech Ground Floor (outside LR3 classroom),-18000.0,1536175000.0,"Notified for rDZmVyHjYQ (42.057569, -87.676077...",typefood,I don't know,"""I don't know"" Response",1536176000.0,1304,21.73333
9,rDZmVyHjYQ,600C7D5C-EF49-4C95-B0D1-135DBBE0BE5C,,79.92116,24.705004,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,Tech Ground Floor (outside LR3 classroom),-18000.0,1536173000.0,"Notified for rDZmVyHjYQ (42.057569, -87.676077...",Missed Notification,Missed Notification,Missed Notification,-1.0,113,1.883333


In [23]:
atdist_overall_moreinfo.groupby(['emaResponse', 'remappedResponses'])['remappedResponses'].count().reset_index(name='count')

Unnamed: 0,emaResponse,remappedResponses,count
0,Yes! This info is useful. I'm going to go there.,"""I don't know"" Response",2
1,Yes! This info is useful. I'm going to go there.,Did Not Go,5
2,Yes! This info is useful. I'm going to go there.,Missed Notification,1
3,Yes! This info is useful. I'm going to go there.,Valid Response,13
4,Yes. This info is useful but I'm already going...,Did Not Go,2
5,Yes. This info is useful but I'm already going...,Valid Response,1


In [24]:
# eXpand providing more data at location if deviating
atdist_deviate_moreinfo = atdist[atdist['emaResponse'].isin(["Yes! This info is useful. I'm going to go there."])]
atdist_deviate_moreinfo = atdist_deviate_moreinfo.merge(atloc, how='left', on=['vendorId', 'taskLocationId'])
atdist_deviate_moreinfo['remappedResponses'].fillna(value='Did Not Go', inplace=True)
atdist_deviate_moreinfo

Unnamed: 0,taskLocationId,vendorId,beaconId_x,distanceToLocation_x,bearingToLocation,locationType_x,locationName_x,notificationDistance,sentBy,infoIncluded,...,distanceToLocation_y,locationType_y,locationName_y,gmtOffset_y,notificationTimestamp_y,notificationString,question,response,remappedResponses,responseTimestamp_y
0,IeRHOjPCB0,AB80C727-EACF-437C-8460-735DEB25361E,,87.094625,201.42926,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,27.368637,freefood,Tech Ground Floor (outside LR3 classroom),-18000.0,1535570000.0,"Notified for IeRHOjPCB0 (42.057569, -87.676077...",kindfood,sweet,Valid Response,1535570000.0
1,IeRHOjPCB0,5899504E-1461-48DE-9ACC-FB9F2A1FDAF8,,86.05708,239.268627,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,19.360045,freefood,Tech Ground Floor (outside LR3 classroom),-18000.0,1535572000.0,"Notified for IeRHOjPCB0 (42.057569, -87.676077...",specificfood,ice cream,Valid Response,1535572000.0
2,qEkg5qqzH6,5899504E-1461-48DE-9ACC-FB9F2A1FDAF8,,86.375498,300.020887,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,28.646216,freefood,Tech Ground Floor (outside LR3 classroom),-18000.0,1535660000.0,"Notified for qEkg5qqzH6 (42.057569, -87.676077...",kindfood,sweet,Valid Response,1535660000.0
3,qEkg5qqzH6,E2DAC389-DD64-4AF8-934A-6D1EF7D68507,,85.939565,282.469459,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,22.316348,freefood,Tech Ground Floor (outside LR3 classroom),-18000.0,1535665000.0,"Notified for qEkg5qqzH6 (42.057569, -87.676077...",typefood,vanilla,Valid Response,1535665000.0
4,qEkg5qqzH6,AB80C727-EACF-437C-8460-735DEB25361E,,85.210044,95.501341,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,14.698409,freefood,Tech Ground Floor (outside LR3 classroom),-18000.0,1535662000.0,"Notified for qEkg5qqzH6 (42.057569, -87.676077...",specificfood,ice cream,Valid Response,1535663000.0
5,tryP91lXSi,9A9310F7-B69A-414F-803D-CF2153014C25,,165.946711,15.722026,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,,,,,,,,,Did Not Go,
6,N3Y1IB2Oo3,D40E7F30-C6F1-45FD-80D2-50AEDBAAF4A3,rIzJ4nI1lp,270.57026,284.280614,gym,SPAC,300,location updates,True,...,,,,,,,,,Did Not Go,
7,N3Y1IB2Oo3,537DF5B3-48B8-4C86-A39F-5DE5DFEE19EC,rIzJ4nI1lp,262.321176,297.218504,gym,SPAC,300,location updates,True,...,-1.0,gym,SPAC,-18000.0,1536185000.0,Notified for beacon region rIzJ4nI1lp,freeweights,yes,Valid Response,1536189000.0
8,rDZmVyHjYQ,F262544F-8C00-4362-87B2-AF8FEE66DB4D,,319.738739,296.913106,freefood,Tech Ground Floor (outside LR3 classroom),300,geofence trip,True,...,26.864289,freefood,Tech Ground Floor (outside LR3 classroom),-18000.0,1536175000.0,"Notified for rDZmVyHjYQ (42.057569, -87.676077...",typefood,I don't know,"""I don't know"" Response",1536176000.0
9,rDZmVyHjYQ,600C7D5C-EF49-4C95-B0D1-135DBBE0BE5C,,79.92116,24.705004,freefood,Tech Ground Floor (outside LR3 classroom),300,location updates,True,...,16.648914,freefood,Tech Ground Floor (outside LR3 classroom),-18000.0,1536173000.0,"Notified for rDZmVyHjYQ (42.057569, -87.676077...",Missed Notification,Missed Notification,Missed Notification,-1.0


In [25]:
# merged data from with all EnRoute data
enroute = get_merged_en_route(deepcopy(enroutelocations),
                                 deepcopy(enroutenotif),
                                 deepcopy(enrouteresp))
enroute

Unnamed: 0,enRouteLocationId,vendorId,distanceToLocation,locationType,locationName,gmtOffset,notificationTimestamp,questionResponse,responseTimestamp,remappedResponses
0,Ner3yTGWzJ,537DF5B3-48B8-4C86-A39F-5DE5DFEE19EC,27.669354,bikerack,SPAC Bike Rack,-18000,1536184496,no,1536184576,Valid Response
1,Ner3yTGWzJ,25FF4B97-71BF-4BB3-A701-A6937D8DDF9A,25.174523,bikerack,SPAC Bike Rack,-18000,1536100149,yes,1535581234,Valid Response
2,Ner3yTGWzJ,600C7D5C-EF49-4C95-B0D1-135DBBE0BE5C,9.550274,bikerack,SPAC Bike Rack,-18000,1535570797,yes,1535570713,Valid Response
3,tnffEhyqJZ,9A9310F7-B69A-414F-803D-CF2153014C25,18.059207,bikerack,Tech Rear Bike Rack,-18000,1536340891,yes,1536340893,Valid Response
4,tnffEhyqJZ,537DF5B3-48B8-4C86-A39F-5DE5DFEE19EC,9.689656,bikerack,Tech Rear Bike Rack,-18000,1536184238,I don't know,1536184270,"""I don't know"" Response"
5,tnffEhyqJZ,D40E7F30-C6F1-45FD-80D2-50AEDBAAF4A3,16.263682,bikerack,Tech Rear Bike Rack,-18000,1535411273,yes,1535411527,Valid Response
6,tnffEhyqJZ,F262544F-8C00-4362-87B2-AF8FEE66DB4D,17.110284,bikerack,Tech Rear Bike Rack,-18000,1536174305,no,1536174467,Valid Response
7,bDFfkSV5PZ,5899504E-1461-48DE-9ACC-FB9F2A1FDAF8,28.201452,bikerack,Ford Bike Rack,-18000,1535571974,yes,1535571980,Valid Response
8,bDFfkSV5PZ,E2DAC389-DD64-4AF8-934A-6D1EF7D68507,25.949504,bikerack,Ford Bike Rack,-18000,1535664397,yes,1535664361,Valid Response
9,bDFfkSV5PZ,5C2E50F3-D8D3-4D79-AF2C-B63360D11E5A,12.741262,bikerack,Ford Bike Rack,-18000,1536085028,yes,1536085024,Valid Response


# Current User Locations

In [26]:
def get_last_update_for_user(all_location_updates, target_vendor_id):
    """
    Gets the last location update for a user, given a vendor id.
    
    Input:
        all_location_updates (DataFrame): all location updatesm sorted in descending order
        target_vendor_id (string): vendor id to get data for
    
    Output:
        (dict): last location update for user
    """
    current_user_updates = all_location_updates[all_location_updates['vendorId'] == target_vendor_id]
    if len(current_user_updates) > 0:
        return dict(all_location_updates[all_location_updates['vendorId'] == target_vendor_id].iloc[0])
    
    return {
        'latitude': 0,
        'longitude': 0,
        'timestamp': 0,
        'gmtOffset': 0
    }

In [27]:
location_updates

Unnamed: 0,createdAt,objectId,updatedAt,gmtOffset,heading,horizontalAccuracy,latitude,longitude,speed,timestamp,vendorId,location
0,2018-09-02 20:09:33.576,000R8GG5Sh,2018-09-02 20:09:33.576,-18000,-1.000000,65.000000,41.865819,-87.624089,-1.000000,1535918973,E2DAC389-DD64-4AF8-934A-6D1EF7D68507,"(41.8658190023699, -87.62408932473174)"
1,2018-09-01 15:14:24.727,000YxGvkCM,2018-09-01 15:14:24.727,-18000,-1.000000,65.000000,42.005785,-87.662617,-1.000000,1535814864,FDFE420C-04ED-47D6-AADF-8909DD559659,"(42.00578491238651, -87.66261653955337)"
2,2018-09-05 18:30:31.112,001YJmG9EE,2018-09-05 18:30:31.112,-18000,-1.000000,65.000000,42.047110,-87.687445,-1.000000,1536171974,5C2E50F3-D8D3-4D79-AF2C-B63360D11E5A,"(42.047109588925466, -87.68744509156218)"
3,2018-09-06 23:26:34.425,001YzOWFLX,2018-09-06 23:26:34.425,-18000,-1.000000,65.000000,42.032976,-87.686532,-1.000000,1536276393,EB73A542-9446-4AFC-BC03-09FA3DA71CEA,"(42.03297597488152, -87.68653201005708)"
4,2018-09-08 01:48:22.200,0026iMQ1Sh,2018-09-08 01:48:22.200,-14400,-1.000000,2000.000000,40.219814,-86.603860,-1.000000,1536371301,C5C548C5-3329-4DD5-9A08-C2937820E0B6,"(40.219813635696376, -86.60386003121074)"
5,2018-09-02 18:00:48.356,002GeKdEDL,2018-09-02 18:00:48.356,-18000,-1.000000,65.000000,42.047710,-87.686089,-1.000000,1535911247,FDFE420C-04ED-47D6-AADF-8909DD559659,"(42.04771036635469, -87.68608939011841)"
7,2018-08-30 20:47:13.803,003IHCl8W8,2018-08-30 20:47:13.803,-18000,319.218750,30.000000,41.891229,-87.636885,5.150000,1535662024,FDFE420C-04ED-47D6-AADF-8909DD559659,"(41.89122876621959, -87.63688450687344)"
8,2018-09-03 05:30:54.223,004OprOQxg,2018-09-03 05:30:54.223,-18000,-1.000000,65.000000,41.771092,-88.167378,-1.000000,1535952654,9A9310F7-B69A-414F-803D-CF2153014C25,"(41.7710918563173, -88.16737764497141)"
9,2018-09-09 18:44:53.427,004l1M1l43,2018-09-09 18:44:53.427,-18000,-1.000000,65.000000,42.086902,-87.849691,-1.000000,1536518692,FDFE420C-04ED-47D6-AADF-8909DD559659,"(42.08690230386105, -87.84969115269115)"
11,2018-09-20 04:08:14.642,005c4sS9Bt,2018-09-20 04:08:14.642,-18000,-1.000000,65.000000,30.566469,-97.614986,-1.000000,1537416494,5C2E50F3-D8D3-4D79-AF2C-B63360D11E5A,"(30.56646927209444, -97.61498578570269)"


In [28]:
# sort and save location updates
location_updates.sort_values('createdAt', ascending=False, inplace=True)

In [29]:
# get last known location for each user
user_last_locations = []

for current_vendor_id in list(users['vendorId'].unique()):
    last_location_update = get_last_update_for_user(location_updates, current_vendor_id)
    output_dict = {
        'vendorId': current_vendor_id,
        'location': (last_location_update['latitude'], last_location_update['longitude']),
        'local_timestamp': datetime.fromtimestamp(last_location_update['timestamp']).strftime('%Y-%m-%d %H:%M:%S')
    }
    
    user_last_locations.append(output_dict)
    
user_last_locations_df = pd.DataFrame(user_last_locations)
user_last_locations_df = user_last_locations_df.merge(users[['vendorId', 'firstName', 'lastName']])
user_last_locations_df['name'] = user_last_locations_df['firstName'] + ' ' + user_last_locations_df['lastName']
user_last_locations_df.sort_values('local_timestamp').reset_index(drop=True)

Unnamed: 0,local_timestamp,location,vendorId,firstName,lastName,name
0,2018-09-05 02:41:18,"(42.00914581237712, -87.66418086403537)",52CF47E5-C543-45F3-9CA6-23308C2B1E57,Garrett,Hedman,Garrett Hedman
1,2018-09-05 22:53:29,"(42.03404791232575, -87.67800335401654)",D40E7F30-C6F1-45FD-80D2-50AEDBAAF4A3,Simone,Bianconi,Simone Bianconi
2,2018-09-09 10:15:26,"(42.044888996511816, -87.67779573069726)",537DF5B3-48B8-4C86-A39F-5DE5DFEE19EC,Aaron,Stone,Aaron Stone
3,2018-09-09 14:37:13,"(41.99412885968501, -87.75289160301857)",8E710E79-A370-42B9-9D2C-ECA8623C1F3B,Amy,Angarita,Amy Angarita
4,2018-09-10 08:54:50,"(42.0569856260738, -87.67678917249012)",5899504E-1461-48DE-9ACC-FB9F2A1FDAF8,Spencer,Carlson,Spencer Carlson
5,2018-09-10 10:55:11,"(42.058397401163404, -87.67878203660965)",FDFE420C-04ED-47D6-AADF-8909DD559659,Leesha,Maliakal,Leesha Maliakal
6,2018-09-11 13:29:16,"(42.057203727130585, -87.67523330082763)",0DD045E1-0166-461B-B8CF-EA57DBFB5AAD,Hexia,Guo,Hexia Guo
7,2018-09-12 08:46:50,"(42.05846362591292, -87.68353601047232)",25FF4B97-71BF-4BB3-A701-A6937D8DDF9A,Anna,Deng,Anna Deng
8,2018-09-12 13:11:04,"(42.058325622770916, -87.67319495558446)",C5C548C5-3329-4DD5-9A08-C2937820E0B6,Hanlin,Li,Hanlin Li
9,2018-09-14 22:10:47,"(42.04979119826299, -87.6820053812933)",9A9310F7-B69A-414F-803D-CF2153014C25,Savanna,Ruiz,Savanna Ruiz


In [30]:
user_map_markers = user_last_locations_df[['name', 'vendorId', 'location', 'local_timestamp']].to_dict('records')
user_locations = [user['location'] for user in user_map_markers]

info_box_template = """
<dl>
<dt>Name</dt><dd>{name}</dd>
<dt>VendorId</dt><dd>{vendorId}</dd>
<dt>Timestamp</dt><dd>{local_timestamp}</dd>
</dl>
"""
user_info = [info_box_template.format(**user) for user in user_map_markers]

marker_layer = gmaps.marker_layer(user_locations, info_box_content=user_info)
fig = gmaps.figure()
fig.add_layer(marker_layer)
fig

Figure(layout=FigureLayout(height='420px'))