In [9]:
import requests
import json
import pandas as pd
import numpy as np
import datetime

from __future__ import division

# Load in all data
### Read in data from database and configure column types.

In [10]:
# URL and header for Parse
base_url = 'https://api.parse.com/1/classes/'
header = {'X-Parse-Application-Id': 'PkngqKtJygU9WiQ1GXM9eC0a17tKmioKKmpWftYr',
          'X-Parse-REST-API-Key': 'ptZAL499EEcmwIrghLTyg3IDB2jqHA3vV4AFp0Bh'}
data = {'limit': '1000'}

# hotspots
resp = requests.get(base_url + 'hotspot', headers=header, data=data)
hotspots = pd.DataFrame(resp.json()['results'])

hotspot_string_cols = ['objectId', 'vendorId', 'archiver', 'locationCommonName', \
                       'tag', 'submissionMethod', 'beaconId']
hotspots['createdAt'] = (pd.to_datetime(hotspots['createdAt']).astype(np.int64) // 10**9) + hotspots['gmtOffset']
hotspots['updatedAt'] = (pd.to_datetime(hotspots['updatedAt']).astype(np.int64) // 10**9) + hotspots['gmtOffset']
hotspots[hotspot_string_cols] = hotspots[hotspot_string_cols].astype(str)

# sent notifications
resp = requests.get(base_url + 'notificationSent', headers=header, data = {'limit': '1000'})
notification_sent = pd.DataFrame(resp.json()['results'])

notification_sent_string_cols = ['objectId', 'vendorId', 'hotspotId', 'notificationString']
notification_sent['createdAt'] = (pd.to_datetime(notification_sent['createdAt']).astype(np.int64) // 10**9) + \
                                  notification_sent['gmtOffset']
notification_sent['updatedAt'] = (pd.to_datetime(notification_sent['updatedAt']).astype(np.int64) // 10**9) + \
                                  notification_sent['gmtOffset']
notification_sent[notification_sent_string_cols] = notification_sent[notification_sent_string_cols].astype(str)

# notification responses
resp = requests.get(base_url + 'pingResponse', headers=header, data = {'limit': '1000'})
notification_responses = pd.DataFrame(resp.json()['results'])

notification_responses_string_cols = ['objectId', 'hotspotId', 'vendorId', 'question', 'response', 'tag']
notification_responses['createdAt'] = (pd.to_datetime(notification_responses['createdAt']).astype(np.int64) \
                                       // 10**9) + notification_responses['gmtOffset']
notification_responses['updatedAt'] = (pd.to_datetime(notification_responses['updatedAt']).astype(np.int64) \
                                       // 10**9) + notification_responses['gmtOffset']
notification_responses[notification_responses_string_cols] = \
    notification_responses[notification_responses_string_cols].astype(str)

# users
resp = requests.get(base_url + 'user', headers=header, data = {'limit': '1000'})
users = pd.DataFrame(resp.json()['results'])

user_string_cols = ['objectId', 'firstName', 'lastName', 'vendorId', \
                    'firstPreference', 'secondPreference', 'thirdPreference', 'fourthPreference']
users['createdAt'] = (pd.to_datetime(users['createdAt']).astype(np.int64) // 10**9) + (-18000)
users['updatedAt'] = (pd.to_datetime(users['updatedAt']).astype(np.int64) // 10**9) + (-18000)
users[user_string_cols] = users[user_string_cols].astype(str)

### Select data between start and end time of study

In [11]:
# declare start and end times 
start_time_date = pd.to_datetime('12-05-2016 16:00:00')
end_time_date = pd.to_datetime('12-09-2016 00:00:00')

start_time = (start_time_date - datetime.datetime(1970,1,1)).total_seconds()
end_time = (end_time_date - datetime.datetime(1970,1,1)).total_seconds()

# subset each data frame
hotspots = hotspots[(hotspots.createdAt >= start_time) & (hotspots.createdAt <= end_time)]
notification_sent = notification_sent[(notification_sent.createdAt >= start_time) & \
                                      (notification_sent.createdAt <= end_time)]
notification_responses = notification_responses[(notification_responses.createdAt >= start_time) & \
                                                (notification_responses.createdAt <= end_time)]
users = users[(users.createdAt >= start_time) & (users.createdAt <= end_time)]

# Compute metrics from study

### Basic demographic info from study

In [12]:
print 'Study Length: ' + str((end_time_date - start_time_date))
print 'Number of users: ' + str(len(users))
print 'Male: 2, Female: 6'

Study Length: 3 days 08:00:00
Number of users: 8
Male: 2, Female: 6


### Number regions, notifications, and responses

In [19]:
kapil_vendor_id = 'F1AE595D-F244-4367-8744-27CA60450F0D'
total_regions_count = len(hotspots)
premarked_regions_count = len(hotspots[(hotspots.locationCommonName != '') | \
                                 (hotspots.vendorId == kapil_vendor_id)])
usermarked_regions_count = total_regions_count - premarked_regions_count

usermarked_regions_agg = hotspots[(hotspots.locationCommonName == '') & \
                                 (hotspots.vendorId != kapil_vendor_id)].groupby('tag').size()

print 'Number regions: ' + str(total_regions_count)
print 'Number Regions pre-marked: ' + str(premarked_regions_count)
print 'Number User-marked regions: ' + str(usermarked_regions_count)
print 'Number User-marked regions by category: ' + str(dict(usermarked_regions_agg))

Number regions: 13
Number Regions pre-marked: 11
Number User-marked regions: 2
Number User-marked regions by category: {'guestevent': 2}


In [14]:
invalid_responses = ['com.apple.UNNotificationDefaultActionIdentifier', 
                     'com.apple.UNNotificationDismissActionIdentifier']
cleaned_notification_responses = notification_responses[~notification_responses.response.isin(invalid_responses)]
notifications_responded_to = pd.merge(notification_sent[['hotspotId', 'vendorId']], 
                                      cleaned_notification_responses[['hotspotId', 'vendorId']], how='inner')
notification_merged = pd.merge(notification_sent, cleaned_notification_responses, \
                               on=['hotspotId', 'vendorId'], how='inner')
notification_merged['timediff'] = notification_merged['createdAt_y'] - notification_merged['createdAt_x']

print 'Number notifications sent: ' + str(len(notification_sent))
print 'Number notifications responded to: ' + str(len(notifications_responded_to))
print 'Notification response rate: ' + str(len(notifications_responded_to) / len(notification_sent))
print 'Median notification response time in minutes: ' + str(notification_merged['timediff'].median() / 60)

Number notifications sent: 56
Number notifications responded to: 16
Notification response rate: 0.285714285714
Median notification response time in minutes: 1.06666666667


### Compute average percentage of region info filled

In [15]:
def proportion_filled(info_dict):
    """
    Compute the amount of a info_dict that is filled (i.e. value(key) != '')
    
    Input:
        info_dict (dict): dictionary containing info
       
    Output: 
        (float): proportion of info_dict filled
    """
    n_keys = len(info_dict.keys())
    used_keys = 0
    
    for key, value in info_dict.iteritems():
        if value != '':
            used_keys += 1
    
    return used_keys / n_keys

proportions_for_hotspots = [proportion_filled(x) for x in hotspots['info']]
avg_filled = np.average([x for x in proportions_for_hotspots if x > 0])

print 'Average number of entries filled in info: ' + str(avg_filled)

Average number of entries filled in info: 0.286458333333
