In [3]:
import requests
import json
import pandas as pd
import numpy as np
import datetime

# Load in all data
### Read in data from database and configure column types.

In [21]:
# URL and header for Parse
base_url = 'https://dtr-les.herokuapp.com/parse/classes/'
header = {'X-Parse-Application-Id': 'PkngqKtJygU9WiQ1GXM9eC0a17tKmioKKmpWftYr'}
data = {'limit': '1000'}

# hotspots
resp = requests.get(base_url + 'hotspot', headers=header, data=data)
hotspots = pd.DataFrame(resp.json()['results'])

hotspot_string_cols = ['objectId', 'vendorId', 'archiver', 'locationCommonName', \
                       'tag', 'submissionMethod', 'beaconId']
hotspots['createdAt'] = (pd.to_datetime(hotspots['createdAt']).astype(np.int64) // 10**9) + hotspots['gmtOffset']
hotspots['updatedAt'] = (pd.to_datetime(hotspots['updatedAt']).astype(np.int64) // 10**9) + hotspots['gmtOffset']
hotspots[hotspot_string_cols] = hotspots[hotspot_string_cols].astype(str)

# sent notifications
resp = requests.get(base_url + 'notificationSent', headers=header, data = {'limit': '1000'})
notification_sent = pd.DataFrame(resp.json()['results'])

notification_sent_string_cols = ['objectId', 'vendorId', 'hotspotId', 'notificationString']
notification_sent['createdAt'] = (pd.to_datetime(notification_sent['createdAt']).astype(np.int64) // 10**9) + \
                                  notification_sent['gmtOffset']
notification_sent['updatedAt'] = (pd.to_datetime(notification_sent['updatedAt']).astype(np.int64) // 10**9) + \
                                  notification_sent['gmtOffset']
notification_sent[notification_sent_string_cols] = notification_sent[notification_sent_string_cols].astype(str)

# notification responses
resp = requests.get(base_url + 'pingResponse', headers=header, data = {'limit': '1000'})
notification_responses = pd.DataFrame(resp.json()['results'])

notification_responses_string_cols = ['objectId', 'hotspotId', 'vendorId', 'question', 'response', 'tag']
notification_responses['createdAt'] = (pd.to_datetime(notification_responses['createdAt']).astype(np.int64) \
                                       // 10**9) + notification_responses['gmtOffset']
notification_responses['updatedAt'] = (pd.to_datetime(notification_responses['updatedAt']).astype(np.int64) \
                                       // 10**9) + notification_responses['gmtOffset']
notification_responses[notification_responses_string_cols] = \
    notification_responses[notification_responses_string_cols].astype(str)

# users
resp = requests.get(base_url + 'user', headers=header, data = {'limit': '1000'})
users = pd.DataFrame(resp.json()['results'])

user_string_cols = ['objectId', 'firstName', 'lastName', 'vendorId', \
                    'firstPreference', 'secondPreference', 'thirdPreference', 'fourthPreference']
users['createdAt'] = (pd.to_datetime(users['createdAt']).astype(np.int64) // 10**9) + (-18000)
users['updatedAt'] = (pd.to_datetime(users['updatedAt']).astype(np.int64) // 10**9) + (-18000)
users[user_string_cols] = users[user_string_cols].astype(str)

### Select data between start and end time of study

In [22]:
# declare start and end times 
start_time_date = pd.to_datetime('02-02-2017 16:00:00')
end_time_date = pd.to_datetime('02-10-2017 00:00:00')

start_time = (start_time_date - datetime.datetime(1970,1,1)).total_seconds()
end_time = (end_time_date - datetime.datetime(1970,1,1)).total_seconds()

# subset each data frame
hotspots = hotspots[(hotspots.createdAt >= start_time) & (hotspots.createdAt <= end_time)]
notification_sent = notification_sent[(notification_sent.createdAt >= start_time) & \
                                      (notification_sent.createdAt <= end_time)]
notification_responses = notification_responses[(notification_responses.createdAt >= start_time) & \
                                                (notification_responses.createdAt <= end_time)]
users = users[(users.createdAt >= start_time) & (users.createdAt <= end_time)]

# Compute metrics from study

### Basic demographic info from study

In [23]:
print('Study Length: ' + str((end_time_date - start_time_date)))
print('Number of users: ' + str(len(users)))
print('Male: 2, Female: 6')

Study Length: 7 days 08:00:00
Number of users: 8
Male: 2, Female: 6


### Number regions, notifications, and responses

In [24]:
kapil_vendor_id = 'F1AE595D-F244-4367-8744-27CA60450F0D'
total_regions_count = len(hotspots)
premarked_regions_count = len(hotspots[(hotspots.locationCommonName != '') | \
                                 (hotspots.vendorId == kapil_vendor_id)])
usermarked_regions_count = total_regions_count - premarked_regions_count

usermarked_regions_agg = hotspots[(hotspots.locationCommonName == '') & \
                                 (hotspots.vendorId != kapil_vendor_id)].groupby('tag').size()

print('Number regions: ' + str(total_regions_count))
print('Number Regions pre-marked: ' + str(premarked_regions_count))
print('Number User-marked regions: ' + str(usermarked_regions_count))
print('Number User-marked regions by category: ' + str(dict(usermarked_regions_agg)))

Number regions: 5
Number Regions pre-marked: 5
Number User-marked regions: 0
Number User-marked regions by category: {}


In [25]:
invalid_responses = ['com.apple.UNNotificationDefaultActionIdentifier', 
                     'com.apple.UNNotificationDismissActionIdentifier']
cleaned_notification_responses = notification_responses[~notification_responses.response.isin(invalid_responses)]
notifications_responded_to = pd.merge(notification_sent[['hotspotId', 'vendorId']], 
                                      cleaned_notification_responses[['hotspotId', 'vendorId']], how='inner')
notification_merged = pd.merge(notification_sent, cleaned_notification_responses, \
                               on=['hotspotId', 'vendorId'], how='inner')
notification_merged['timediff'] = notification_merged['createdAt_y'] - notification_merged['createdAt_x']

print('Number notifications sent: ' + str(len(notification_sent)))
print('Number notifications responded to: ' + str(len(notifications_responded_to)))
print('Notification response rate: ' + str(len(notifications_responded_to) / len(notification_sent)))
print('Median notification response time in minutes: ' + str(notification_merged['timediff'].median() / 60))

Number notifications sent: 13
Number notifications responded to: 7
Notification response rate: 0.5384615384615384
Median notification response time in minutes: 6.866666666666666


In [26]:
print(notification_sent)

      createdAt  gmtOffset   hotspotId  \
15   1486392548     -21600  1Oe3VvnkuA   
79   1486136181     -21600  1Oe3VvnkuA   
335  1486136147     -21600  1Oe3VvnkuA   
438  1486122582     -21600  vE4k73uRkZ   
439  1486249240     -21600  vE4k73uRkZ   
440  1486326320     -21600  vE4k73uRkZ   
441  1486334800     -21600  vE4k73uRkZ   
442  1486388931     -21600  vE4k73uRkZ   
443  1486414081     -21600  vE4k73uRkZ   
444  1486486458     -21600  We2Kav6DYE   
445  1486486506     -21600  We2Kav6DYE   
446  1486487082     -21600  We2Kav6DYE   
447  1486500450     -21600  We2Kav6DYE   

                                    notificationString    objectId  \
15               Notified for beacon region sJlkSUIKVR  8KY0EorjFD   
79               Notified for beacon region sJlkSUIKVR  TBn5Ug8dIL   
335              Notified for beacon region sJlkSUIKVR  iun37TSbC5   
438  Notified for vE4k73uRkZ (42.05833, -87.683644)...  06RXIyw9GW   
439  Notified for vE4k73uRkZ (42.05833, -87.683644)...  NOX47

In [28]:
print(hotspots)

    archived archiver    beaconId   createdAt  gmtOffset  \
207    False           sJlkSUIKVR  1486472208     -21600   
229    False           sJlkSUIKVR  1486512346     -21600   
261    False                       1486512172     -18000   
269     True   system  sJlkSUIKVR  1486392950     -21600   
289     True   system              1486468907     -18000   

                                                  info  \
207  {'valueright': '', 'objectleft': '', 'colorlef...   
229  {'frosted': '', 'plain': '', 'markercolor': ''...   
261  {'event': '', 'manypeople': '', 'seatingnearpo...   
269  {'valueright': '', 'objectleft': '', 'colorlef...   
289  {'event': '', 'manypeople': '', 'seatingnearpo...   

                                              location locationCommonName  \
207  {'latitude': 42.05697360918117, '__type': 'Geo...                      
229  {'latitude': 42.05633193258387, '__type': 'Geo...                      
261  {'latitude': 42.05833, '__type': 'GeoPoint', '...     

In [13]:
print(notifications_responded_to)
print()

    hotspotId                              vendorId
0  1Oe3VvnkuA  57DB09A8-068E-4315-B208-D82E24BC7F6D
1  vE4k73uRkZ  F1AE595D-F244-4367-8744-27CA60450F0D
2  vE4k73uRkZ  F1AE595D-F244-4367-8744-27CA60450F0D
3  vE4k73uRkZ  F1AE595D-F244-4367-8744-27CA60450F0D
4  vE4k73uRkZ  F1AE595D-F244-4367-8744-27CA60450F0D


### Compute average percentage of region info filled

In [10]:
def proportion_filled(info_dict):
    """
    Compute the amount of a info_dict that is filled (i.e. value(key) != '')
    
    Input:
        info_dict (dict): dictionary containing info
       
    Output: 
        (float): proportion of info_dict filled
    """
    n_keys = len(info_dict.keys())
    used_keys = 0
    
    for key, value in info_dict.items():
        if value != '':
            used_keys += 1
    
    return used_keys / n_keys

proportions_for_hotspots = [proportion_filled(x) for x in hotspots['info']]
avg_filled = np.average([x for x in proportions_for_hotspots if x > 0])

print('Average number of entries filled in info: ' + str(avg_filled))

Average number of entries filled in info: nan


  ret = ret.dtype.type(ret / rcount)
