In [2]:
from google.cloud import bigquery

import pathlib # __file__ isn't recognized in Jupyter, so we need this to get the root path
path = pathlib.Path.cwd()
PROJ_ROOT = path.parent

accountkeyfile = str(PROJ_ROOT.parent / 'credentials\GoogleServiceAccountKeyFile.json')
client = bigquery.Client.from_service_account_json(accountkeyfile)

#### Accounts Per Device

Device determined by device.advertising_id

In [3]:
query = """
SELECT
    device.advertising_id AS device_id,
    COUNT(DISTINCT user_properties.value.string_value) AS Usernames,
    COUNT(DISTINCT user_pseudo_id) AS Pseudo_IDs
FROM
    `analytics_153084895.events_*`,
    UNNEST(user_properties) AS user_properties
WHERE 
    ((SELECT event_params.value.string_value
        FROM UNNEST(event_params) AS event_params
        WHERE event_params.key = 'firebase_screen_class') = 'HomeActivity')
GROUP BY
    device.advertising_id
ORDER BY 
    COUNT(DISTINCT user_properties.value.string_value) DESC
"""
df = client.query(query).to_dataframe()
df.head()

Unnamed: 0,device_id,Usernames,Pseudo_IDs
0,733bdd53-745a-4e95-b32e-9595690aaf52,22,31
1,788dc562-cc05-44c0-bc26-7e24ff3c3768,17,46
2,42479d6a-a2e2-4530-be2f-1cc0a77245f0,3,4
3,2f84376a-841a-4d6f-83fc-2d339294e02d,3,3
4,06cc47f7-954d-4996-96e5-c449e268f487,2,2


### Total # of Events by Phone model

In [5]:
query = """
SELECT
    COUNT(device.mobile_os_hardware_model) AS events,
    user_properties.value.string_value,
    device.mobile_os_hardware_model AS model
FROM
    `heycharlie-ada47.analytics_153084895.events_*`,
    UNNEST(user_properties) AS user_properties
WHERE
    event_date > '20181109'
    AND user_properties.value.string_value in ('+vinny', 'shad christie', 'driverseat', 'blonco', 'doyle coleman', 'joeltortega')
GROUP BY
    device.mobile_os_hardware_model,
    user_properties.value.string_value
ORDER BY
    COUNT(device.mobile_os_hardware_model) DESC
"""
df = client.query(query).to_dataframe()
df.head()

Unnamed: 0,events,string_value,model
0,18129,+vinny,Pixel


### Total # of Events by Username

In [9]:
query = """
SELECT
    COUNT(user_properties.value.string_value) AS events,
    user_properties.value.string_value AS Username
FROM
    `heycharlie-ada47.analytics_153084895.events_*`,
    UNNEST(user_properties) AS user_properties
WHERE
    event_date > '20181109'
    AND user_properties.value.string_value in ('+vinny', 'shad christie', 'driverseat', 'blonco', 'doyle coleman', 'joeltortega')
GROUP BY
    user_properties.value.string_value
ORDER BY
    COUNT(user_properties.value.string_value) DESC
"""
df = client.query(query).to_dataframe()
df.head()

Unnamed: 0,events,Username
0,17819,+vinny


### # of Notification Events by Username

TODO - Break out notifications by type (positive reinforcement 1x/day, texts, calls)

TODO - Look at responses to notifications

In [12]:
query = """
SELECT
    user_properties.value.string_value as username,
    COUNT(*) AS count
FROM
    `analytics_153084895.events_*`,
    UNNEST(user_properties) AS user_properties,
    UNNEST(event_params) AS event_params
WHERE 
    event_params.key = 'notification_event'
    AND event_date > '20190101'
    AND user_properties.value.string_value in ('Shad Christie', 'Driverseat', 'Blonco', 'Doyle Coleman', 'JoelTOrtega')
GROUP BY
    user_properties.value.string_value
ORDER BY
    COUNT(*) DESC
"""
df = client.query(query).to_dataframe()
df.head(5)

Unnamed: 0,username,count
0,Shad Christie,231
1,Doyle Coleman,88
2,Driverseat,3
3,Blonco,1
4,JoelTOrtega,1


In [10]:
weekday_dict = {0: 'Mon', 1: 'Tue', 2: 'Wed', 3: 'Thu', 4: 'Fri', 5: 'Sat', 6: 'Sun'}
list = [0, 2, 4, 1]
list.sort()
weekday_dict[2]

'Wed'

In [6]:
string = ', '.join(weekday_dict[list])
print(string)

TypeError: unhashable type: 'list'

In [14]:
query = """
SELECT
    event_name AS event_name,
    COUNT(event_name) AS count,
    user_properties.value.string_value
FROM
    `analytics_153084895.events_*`,
    UNNEST(user_properties) AS user_properties,
    UNNEST(event_params) AS event_params
WHERE 
    event_params.key = 'notification_event'
    AND user_properties.value.string_value in ('Shad Christie', 'Driverseat', 'Blonco', 'Doyle Coleman', 'JoelTOrtega')
GROUP BY
    event_name,
    user_properties.value.string_value
ORDER BY
    event_name
"""
df = client.query(query).to_dataframe()
df.head(50)

Unnamed: 0,event_name,count,string_value
0,daily_task_notification,1,Driverseat
1,daily_task_notification,1,Blonco
2,daily_task_notification,1,Doyle Coleman
3,daily_task_notification,1,JoelTOrtega
4,daily_task_notification,1,Shad Christie
5,first_week_questionnaire_answer,1,Shad Christie
6,first_week_questionnaire_cancel,1,Driverseat
7,first_week_questionnaire_open,1,Shad Christie
8,first_week_questionnaire_open,1,Driverseat
9,friend_scoring_notification,8,Shad Christie


### Daily Count of Risky Text Events

In [19]:
query = """
SELECT
    event_date AS Date,
    SUM(CASE WHEN event_name = "risky_sent_sms_warning_event" THEN 1 ELSE 0 END) AS risky_sent_sms,
    SUM(CASE WHEN event_name = "risky_sent_sms_warning_notification" THEN 1 ELSE 0 END) AS risky_sent_notification,
    SUM(CASE WHEN event_name = "risky_sms_event" THEN 1 ELSE 0 END) AS risky_in_sms,
    SUM(CASE WHEN event_name = "risky_sms_warning_notification" THEN 1 ELSE 0 END) AS risky_in_notification,
    COUNT(event_name) AS events
FROM 
    (SELECT DISTINCT event_date, event_name, event_server_timestamp_offset
        FROM `analytics_153084895.events_*`)
WHERE
    event_name = "risky_sent_sms_warning_event"
    OR event_name = "risky_sent_sms_warning_notification"
    OR event_name = "risky_sms_event"
    OR event_name = "risky_sms_warning_notification"
GROUP BY
    event_date
ORDER BY
    event_date DESC
"""
df = client.query(query).to_dataframe()
df.head()

Unnamed: 0,Date,risky_sent_sms,risky_sent_notification,risky_in_sms,risky_in_notification,event
0,20190123,1,1,1,1,4
1,20190119,6,4,6,4,20
2,20190118,0,0,1,1,2
3,20190117,4,3,7,7,21
4,20190116,3,3,5,3,14


### Daily Risky Events and Notifications, by User

In [None]:
query = """
SELECT
    user_properties.value.string_value AS username,
    SUM(CASE WHEN event_name = "text_message_received" THEN 1 ELSE 0 END) AS in_sms,
    SUM(CASE WHEN event_name = "text_message_sent" THEN 1 ELSE 0 END) AS sent_sms,
    SUM(CASE WHEN event_name = "risky_sent_sms_warning_event" THEN 1 ELSE 0 END) AS risky_sent_sms,
    SUM(CASE WHEN event_name = "risky_sent_sms_warning_notification" THEN 1 ELSE 0 END) AS risky_sent_notification,
    SUM(CASE WHEN event_name = "risky_sms_event" THEN 1 ELSE 0 END) AS risky_in_sms,
    SUM(CASE WHEN event_name = "risky_sms_warning_notification" THEN 1 ELSE 0 END) AS risky_in_notification
FROM 
    `analytics_153084895.events_*`,
    UNNEST(user_properties) AS user_properties
WHERE
    user_properties.value.string_value in ('Shad Christie', 'Driverseat', 'Blonco', 'Doyle Coleman', 'JoelTOrtega', 'Pinky618 ')
    AND (event_name = "risky_sent_sms_warning_event"
        OR event_name = "risky_sent_sms_warning_notification"
        OR event_name = 'text_message_received'
        OR event_name = 'text_message_sent'
        OR event_name = "risky_sms_event"
        OR event_name = "risky_sms_warning_notification")
GROUP BY
    user_properties.value.string_value
"""
df = client.query(query).to_dataframe()
df.head(100)

In [5]:
query = """
SELECT
    event_date AS Date,
    user_properties.value.string_value AS username,
    SUM(CASE WHEN event_name = "text_message_received" THEN 1 ELSE 0 END) AS in_sms,
    SUM(CASE WHEN event_name = "text_message_sent" THEN 1 ELSE 0 END) AS sent_sms,
    SUM(CASE WHEN event_name = "risky_sent_sms_warning_event" THEN 1 ELSE 0 END) AS risky_sent_sms,
    SUM(CASE WHEN event_name = "risky_sent_sms_warning_notification" THEN 1 ELSE 0 END) AS risky_sent_notification,
    SUM(CASE WHEN event_name = "risky_sms_event" THEN 1 ELSE 0 END) AS risky_in_sms,
    SUM(CASE WHEN event_name = "risky_sms_warning_notification" THEN 1 ELSE 0 END) AS risky_in_notification
FROM 
    `analytics_153084895.events_*`,
    UNNEST(user_properties) AS user_properties
WHERE
    user_properties.value.string_value in ('Shad Christie', 'Driverseat', 'Blonco', 'Doyle Coleman', 'JoelTOrtega', 'Pinky618 ')
    AND (event_name = "risky_sent_sms_warning_event"
        OR event_name = "risky_sent_sms_warning_notification"
        OR event_name = 'text_message_received'
        OR event_name = 'text_message_sent'
        OR event_name = "risky_sms_event"
        OR event_name = "risky_sms_warning_notification")
GROUP BY
    event_date, user_properties.value.string_value
ORDER BY
    event_date DESC
"""
df = client.query(query).to_dataframe()
df.head(100)

Unnamed: 0,username,in_sms,sent_sms,risky_sent_sms,risky_sent_notification,risky_in_sms,risky_in_notification
0,JoelTOrtega,29,31,0,0,0,0
1,Shad Christie,149,66,0,0,0,0
2,Pinky618,167,63,6,2,7,2
3,Doyle Coleman,53,4,0,0,0,0


### Days of App Use

In [None]:
query = """
SELECT
    event_date AS Date,
    SUM(CASE WHEN event_name = "risky_sent_sms_warning_event" THEN 1 ELSE 0 END) AS risky_sent_sms,
    SUM(CASE WHEN event_name = "risky_sent_sms_warning_notification" THEN 1 ELSE 0 END) AS risky_sent_notification,
    SUM(CASE WHEN event_name = "risky_sms_event" THEN 1 ELSE 0 END) AS risky_in_sms,
    SUM(CASE WHEN event_name = "risky_sms_warning_notification" THEN 1 ELSE 0 END) AS risky_in_notification,
    user_properties.value.string_value AS username,
    COUNT(event_name) AS total_risly_sms_events
FROM 
    `analytics_153084895.events_*`,
    UNNEST(user_properties) AS user_properties
WHERE
    user_properties.value.string_value <> "null"
    AND (event_name = "risky_sent_sms_warning_event"
        OR event_name = "risky_sent_sms_warning_notification"
        OR event_name = "risky_sms_event"
        OR event_name = "risky_sms_warning_notification")
GROUP BY
    event_date, user_properties.value.string_value
ORDER BY
    event_date DESC
"""
df = client.query(query).to_dataframe()
df.head(10)

### Days of App Use

In [24]:
query = """
SELECT
    MIN(event_date) AS Start_Date,
    MAX(event_date) AS Max_Date,
    ROUND((MAX(CAST(event_timestamp AS INT64)) - MIN(CAST(event_timestamp AS INT64))) / (86400 * 1000000),1) AS Installed_Days,
    user_properties.value.string_value AS Username,
    MIN(user_first_touch_timestamp) = MIN(event_timestamp) AS first_touch_equal_first_event,
    MIN(user_first_touch_timestamp) AS first_touch_timestamp,
    MIN(event_timestamp) AS min_event_timestamp,
    MAX(event_timestamp) AS max_timestamp
FROM
    `analytics_153084895.events_*`,
    UNNEST(user_properties) AS user_properties
WHERE
    user_first_touch_timestamp > 20181110
    AND user_properties.value.string_value in ('Shad Christie', 'Driverseat', 'Blonco', 'Doyle Coleman', 'JoelTOrtega')
GROUP BY
    user_properties.value.string_value
ORDER BY
    (MAX(CAST(event_timestamp AS INT64)) - MIN(CAST(event_timestamp AS INT64))) DESC
"""
df = client.query(query).to_dataframe()
df.head(5)

Unnamed: 0,Start_Date,Max_Date,Installed_Days,Username,first_touch_equal_first_event,first_touch_timestamp,min_event_timestamp,max_timestamp
0,20190311,20190329,17.9,Shad Christie,False,1552331570671000,1552331642821000,1553880640483288
1,20190319,20190329,10.1,Doyle Coleman,False,1553005169571000,1553005361651000,1553877722124779
2,20190319,20190328,8.8,Driverseat,False,1553005074463000,1553005140856000,1553769240275000
3,20190319,20190325,6.5,Blonco,False,1553004947390000,1553005035909000,1553567920554001
4,20190319,20190319,0.4,JoelTOrtega,False,1553004788248000,1553004891710000,1553041245108001


### Installs

In [2]:
query = """
SELECT
    MIN(event_date) AS Date,
    MIN(event_timestamp) AS min_event_timestamp,
    MIN(user_first_touch_timestamp) AS Install,
    user_properties.value.string_value AS Username,
    geo.region AS state
FROM
    `analytics_153084895.events_*`,
    UNNEST(user_properties) AS user_properties
GROUP BY
    user_properties.value.string_value,
    geo.region
ORDER BY 
    MIN(event_date) DESC
"""
df = client.query(query).to_dataframe()
df.head(40)

Unnamed: 0,Date,min_event_timestamp,Install,Username,state
0,20190322,1553258436195076,1541891267037000,emily,Rhode Island
1,20190321,1553174214210000,1552519733657000,fred13,Washington
2,20190320,1553113169805000,1553112810129000,Pinky618,New York
3,20190319,1553004891710000,1553004788248000,JoelTOrtega,New York
4,20190319,1553005140856000,1553005074463000,Driverseat,New York
5,20190319,1553005035909000,1553004947390000,Blonco,New York
6,20190319,1553005432822000,1552519733657000,fred13,New York
7,20190319,1553005361651000,1553005169571000,Doyle Coleman,New York
8,20190317,1552827197414000,1551975647815000,vc,Rhode Island
9,20190316,1552778492798000,1551975647815000,vc,Connecticut


### Event Count For Date Range

TABLE_DATE_RANGE is legacy SQL and seems to only work in the BigQuery web portal

In [16]:
query = """
SELECT
    COUNT(event_name) AS NotificationEvents,
    event_name,
    event_params.value.string_value AS string_value
FROM
    `analytics_153084895.*`,
    UNNEST(event_params) AS event_params
WHERE
    _TABLE_SUFFIX IN ('events_20190122', 'events_20191231')
GROUP BY
    event_name, string_value
ORDER BY
    NotificationEvents DESC
"""

# This is the legacy SQL
# query = """ 
# SELECT
#     COUNT(event_name) AS NotificationEvents,
#     event_name,
#     event_params.value.string_value AS string_value
# FROM
#     TABLE_DATE_RANGE( analytics_153084895.events_,
#         TIMESTAMP('2018-01-01'),
#         TIMESTAMP('2018-12-31'))
# GROUP BY
#     event_name, string_value
# ORDER BY
#     NotificationEvents DESC
# """

df = client.query(query).to_dataframe()
df.head(10)

Unnamed: 0,NotificationEvents,event_name,string_value
0,449,text_message_received,app
1,449,text_message_received,
2,448,reviver_scimitar_already_running,app
3,400,text_message_received,sms
4,252,data_sync_friend_event_3_tries,app
5,223,text_message_received,received
6,223,text_message_received,SMSMonitoringService
7,194,text_message_sent,app
8,194,text_message_sent,sms
9,177,text_message_received,BroadcastReceiver


### First Week Feedback

In [18]:
query = """
SELECT
    event_date AS date,
    event_timestamp AS timestamp,
    user_properties.value.string_value as username,
    (SELECT event_params.value.string_value
    FROM UNNEST(event_params) AS event_params
    WHERE event_params.key = 'face') AS face,
    (SELECT event_params.value.string_value
    FROM UNNEST(event_params) AS event_params
    WHERE event_params.key = 'feedback') AS feedback
FROM
    `analytics_153084895.events_*`,
    UNNEST(user_properties) AS user_properties
WHERE 
    event_name = 'first_week_questionnaire_answer'
    AND user_properties.value.string_value IS NOT NULL
ORDER BY
    event_timestamp DESC
"""
df = client.query(query).to_dataframe()
df

Unnamed: 0,date,timestamp,username,face,feedback
0,20190115,1547606474382004,+vinny,nuetral,Gabagaba Goo!
1,20190108,1546959287604080,vinnov10,nuetral,Fg\n
2,20190107,1546917550772016,test23,happy,
3,20190107,1546914239980004,vinnov10,happy,Ffv
4,20190107,1546906391776069,vinnov10,happy,Great!
5,20190104,1546639224053006,+vinny,sad,Same day as reinstall\n
6,20190102,1546470188021004,+vinny,nuetral,"Just reinstalled, old login, already got one o..."
7,20190101,1546394111869010,mckay68,happy,
8,20181205,1544047876047019,+vinny,happy,Test entry
9,20181128,1543445020495005,Upstate518,happy,


### Onboarding Events

In [22]:
query = """
SELECT
    t1.event_date AS Date,
    t1.event_name AS event_name,
    t1.user_pseudo_id AS user_pseudo_id,
    t2.username as Username,
    t1.event_timestamp AS timestamp,
    t1.event_previous_timestamp AS previous_timestamp,
    t1.user_first_touch_timestamp AS first_touch_timestamp,
    t2.done_timestamp AS done_timestamp,
    (SELECT event_params.value.string_value
        FROM UNNEST(t1.event_params) AS event_params
        WHERE event_params.key = 'firebase_screen_class') AS screen,
    (SELECT event_params.value.string_value
        FROM UNNEST(t1.event_params) AS event_params
        WHERE event_params.key = 'firebase_previous_class') AS prev_screen
FROM
    `analytics_153084895.events_*` as t1,
    UNNEST(t1.user_properties) AS user_properties
JOIN(
    SELECT
        MIN(event_timestamp) AS done_timestamp,
        user_properties.value.string_value AS username
    FROM
        `analytics_153084895.events_*`,
         UNNEST(user_properties) AS user_properties
    WHERE 
        ((SELECT event_params.value.string_value
            FROM UNNEST(event_params) AS event_params
            WHERE event_params.key = 'firebase_screen_class') = 'HomeActivity')
    GROUP BY
        user_properties.value.string_value) AS t2
ON t2.username = user_properties.value.string_value
WHERE
    t1.event_timestamp < t2.done_timestamp
    AND t1.event_name = 'screen_view'
    AND t2.username != 'null'
"""
df = client.query(query).to_dataframe()
df.head(10)

Unnamed: 0,Date,event_name,user_pseudo_id,Username,timestamp,previous_timestamp,first_touch_timestamp,done_timestamp,screen,prev_screen
0,20180909,screen_view,737f4d6f64fe8b7c9e00640bd7798484,test7,1536521600463002,1536521600430002,1536287952446000,1536521650806029,WelcomeActivity,WelcomeActivity
1,20180909,screen_view,737f4d6f64fe8b7c9e00640bd7798484,test7,1536521605509005,1536521605377005,1536287952446000,1536521650806029,LegalAbstractActivity,LegalAbstractActivity
2,20180909,screen_view,737f4d6f64fe8b7c9e00640bd7798484,test7,1536521607855008,1536521607794008,1536287952446000,1536521650806029,PermissionsActivity,PermissionsActivity
3,20180909,screen_view,737f4d6f64fe8b7c9e00640bd7798484,test7,1536521609414010,1536521609355010,1536287952446000,1536521650806029,NotificationsPermissionActivity,NotificationsPermissionActivity
4,20180909,screen_view,737f4d6f64fe8b7c9e00640bd7798484,test7,1536521610492012,1536521610430012,1536287952446000,1536521650806029,AboutYouActivity,AboutYouActivity
5,20180909,screen_view,737f4d6f64fe8b7c9e00640bd7798484,test7,1536521619324015,1536521619305015,1536287952446000,1536521650806029,PlacesMessageActivity,PlacesMessageActivity
6,20180909,screen_view,737f4d6f64fe8b7c9e00640bd7798484,test7,1536521620852017,1536521620814017,1536287952446000,1536521650806029,PlacesActivity,PlacesActivity
7,20180909,screen_view,737f4d6f64fe8b7c9e00640bd7798484,test7,1536521623226020,1536521623183020,1536287952446000,1536521650806029,RelationshipsMessageActivity,RelationshipsMessageActivity
8,20180909,screen_view,737f4d6f64fe8b7c9e00640bd7798484,test7,1536521624283022,1536521624244022,1536287952446000,1536521650806029,RelationshipsActivity,RelationshipsActivity
9,20180909,screen_view,737f4d6f64fe8b7c9e00640bd7798484,test7,1536521641298025,1536521641271025,1536287952446000,1536521650806029,QuestionsActivity,QuestionsActivity


### Onboarding Screen View Timing

In [23]:
query = """
SELECT
    t1.event_date AS Date,
    t1.event_name AS event_name,
    t1.user_pseudo_id AS user_pseudo_id,
    t2.username as Username,
    t1.event_timestamp AS timestamp,
    t1.event_previous_timestamp AS previous_timestamp,
    t1.user_first_touch_timestamp AS first_touch_timestamp,
    t2.done_timestamp AS done_timestamp,
    (SELECT event_params.value.string_value
        FROM UNNEST(t1.event_params) AS event_params
        WHERE event_params.key = 'firebase_previous_class') AS Screen
FROM
    `analytics_153084895.events_*` as t1,
    UNNEST(t1.user_properties) AS user_properties
JOIN(
    SELECT
        MIN(event_timestamp) AS done_timestamp,
        user_properties.value.string_value AS username
    FROM
        `analytics_153084895.events_*`,
        UNNEST(user_properties) AS user_properties
    WHERE 
        ((SELECT event_params.value.string_value
        FROM UNNEST(event_params) AS event_params
        WHERE event_params.key = 'firebase_screen_class') = 'HomeActivity')
        GROUP BY
            user_properties.value.string_value) AS t2
ON t2.username = user_properties.value.string_value
WHERE
    t1.event_timestamp < t2.done_timestamp
    AND t1.event_name = 'screen_view'
    AND t2.username != 'null'
    AND ((SELECT event_params.value.string_value
        FROM UNNEST(t1.event_params) AS event_params
        WHERE event_params.key = 'firebase_screen_class') != 
            (SELECT event_params.value.string_value
            FROM UNNEST(t1.event_params) AS event_params
            WHERE event_params.key = 'firebase_previous_class'))
"""
df = client.query(query).to_dataframe()
df.head(10)

Unnamed: 0,Date,event_name,user_pseudo_id,Username,timestamp,previous_timestamp,first_touch_timestamp,done_timestamp,Screen
0,20180918,screen_view,e055d13a04995d499cb582db0cdd9861,vinsep18,1537277652904038,1537277643534038,1537277456010000,1537277721949068,QuestionsActivity
1,20180918,screen_view,e055d13a04995d499cb582db0cdd9861,vinsep18,1537277667756043,1537277653957043,1537277456010000,1537277721949068,QuestionsActivity
2,20180918,screen_view,e055d13a04995d499cb582db0cdd9861,vinsep18,1537277680732049,1537277668983049,1537277456010000,1537277721949068,QuestionsActivity
3,20180918,screen_view,e055d13a04995d499cb582db0cdd9861,vinsep18,1537277695147055,1537277681927055,1537277456010000,1537277721949068,QuestionsActivity
4,20180918,screen_view,e055d13a04995d499cb582db0cdd9861,vinsep18,1537277710624061,1537277697357061,1537277456010000,1537277721949068,QuestionsActivity
5,20180918,screen_view,e055d13a04995d499cb582db0cdd9861,vinsep18,1537277721523067,1537277711793067,1537277456010000,1537277721949068,QuestionsActivity
6,20180918,screen_view,e055d13a04995d499cb582db0cdd9861,vinsep18,1537277492534010,1537277488612010,1537277456010000,1537277721949068,DexterActivity
7,20180918,screen_view,f29429561128e4ac35ace6435278b8f5,emily,1537276898440011,1537276890991011,1537276848519000,1537276933746019,DexterActivity
8,20180918,screen_view,e055d13a04995d499cb582db0cdd9861,vinsep18,1537277483984001,1537277459757001,1537277456010000,1537277721949068,SignupActivity
9,20180918,screen_view,e055d13a04995d499cb582db0cdd9861,vinsep18,1537277486506004,1537277484011004,1537277456010000,1537277721949068,WelcomeActivity


### Total Onboarding Time

In [28]:
query = """
SELECT
    MIN(event_date) AS Date,
    user_properties.value.string_value AS Username,
    MIN(event_timestamp) AS end_timestamp,
    MIN(user_first_touch_timestamp) AS first_touch_timestamp
FROM
    `analytics_153084895.events_*`,
    UNNEST(user_properties) AS user_properties
WHERE 
    event_name = 'screen_view'
    AND ((SELECT event_params.value.string_value
        FROM UNNEST(event_params) AS event_params
        WHERE event_params.key = 'firebase_screen_class') = 'HomeActivity')
GROUP BY
    user_properties.value.string_value,
    (SELECT event_params.value.string_value
        FROM UNNEST(event_params) AS event_params
        WHERE event_params.key = 'firebase_screen_class')
"""
df = client.query(query).to_dataframe()
df.head(10)

Unnamed: 0,Date,Username,end_timestamp,first_touch_timestamp
0,20181008,vinoct8,1539031645606064,1539031438715000
1,20180806,test,1533604528897025,1533604300723000
2,20181112,Jake,1542042226682040,1542041638325000
3,20180928,vinsep27,1538144770585006,1538096669037000
4,20190110,hbickers,1547138320091091,1547137724581000
5,20181113,Rabbit,1542125087450034,1542123674261000
6,20181031,emily3,1541029311184099,1541029065409000
7,20190123,test3,1548298574814032,1542476516676000
8,20181214,soberhouse,1544824867119004,1544143545423000
9,20181002,vinoct2,1538502454136048,1538502319269000


### Post-Onboarding Screen View Timing

In [7]:
query = """
SELECT
    t1.event_date AS Date,
    t1.event_name AS event_name,
    t2.username as Username,
    t1.event_timestamp AS timestamp,
    t1.event_previous_timestamp AS previous_timestamp,
    t1.user_first_touch_timestamp AS first_touch_timestamp,
    (SELECT event_params.value.string_value
        FROM UNNEST(t1.event_params) AS event_params
        WHERE event_params.key = 'firebase_previous_class') AS Screen
FROM
    `analytics_153084895.events_*` as t1,
    UNNEST(t1.user_properties) AS user_properties
JOIN(
    SELECT
        MIN(event_timestamp) AS done_timestamp,
        user_properties.value.string_value AS username
    FROM
        `analytics_153084895.events_*`,
        UNNEST(user_properties) AS user_properties
    WHERE 
        ((SELECT event_params.value.string_value
        FROM UNNEST(event_params) AS event_params
        WHERE event_params.key = 'firebase_screen_class') = 'HomeActivity')
    GROUP BY
        user_properties.value.string_value) AS t2
ON t2.username = user_properties.value.string_value
WHERE
    t1.event_timestamp > t2.done_timestamp
    AND t1.event_name = 'screen_view'
    AND t2.username = 'Shad Christie'
    AND ((SELECT event_params.value.string_value
        FROM UNNEST(t1.event_params) AS event_params
        WHERE event_params.key = 'firebase_screen_class') != 
            (SELECT event_params.value.string_value
            FROM UNNEST(t1.event_params) AS event_params
            WHERE event_params.key = 'firebase_previous_class'))
"""
df = client.query(query).to_dataframe()
df.head(10)

Unnamed: 0,Date,event_name,Username,timestamp,previous_timestamp,first_touch_timestamp,Screen
0,20190311,screen_view,Shad Christie,1552338453287093,1552338442594093,1552331570671000,HomeActivity
1,20190311,screen_view,Shad Christie,1552338465420097,1552338453311097,1552331570671000,RelationshipsActivity
2,20190311,screen_view,Shad Christie,1552338500587102,1552338500374102,1552331570671000,RelationshipsActivity
3,20190311,screen_view,Shad Christie,1552338504215105,1552338500640105,1552331570671000,HomeActivity
4,20190311,screen_view,Shad Christie,1552338513947108,1552338504278108,1552331570671000,PlacesActivity
5,20190311,screen_view,Shad Christie,1552338500374101,1552338465421101,1552331570671000,QuestionsActivity
6,20190319,screen_view,Shad Christie,1553013666324011,1553013666126011,1552331570671000,SigninSignupActivity
7,20190319,screen_view,Shad Christie,1553014783652001,1553013666339001,1552331570671000,HomeActivity
8,20190319,screen_view,Shad Christie,1553014783911007,1553014783652007,1552331570671000,SigninSignupActivity
9,20190319,screen_view,Shad Christie,1553014787671015,1553014783944015,1552331570671000,HomeActivity


### Screen View Timing Data

In [25]:
query = """
SELECT
    (SELECT event_params.value.string_value
        FROM UNNEST(event_params) AS event_params
        WHERE event_params.key = 'firebase_previous_class') AS Screen,
    event_date AS Date,
    user_properties.value.string_value AS Username,
    event_timestamp AS end_timestamp,
    event_previous_timestamp AS start_timestamp
FROM
    `analytics_153084895.events_*`,
    UNNEST(user_properties) AS user_properties
WHERE 
    event_name = 'screen_view'
    AND ((SELECT event_params.value.string_value
        FROM UNNEST(event_params) AS event_params
        WHERE event_params.key = 'firebase_screen_class') != 
            (SELECT event_params.value.string_value
            FROM UNNEST(event_params) AS event_params
            WHERE event_params.key = 'firebase_previous_class'))
ORDER BY
    event_timestamp
"""
df = client.query(query).to_dataframe()
df.head(10)

Unnamed: 0,Screen,Date,Username,end_timestamp,start_timestamp
0,HomeActivity,20180711,,1531358521175000,1531257346796000
1,SigninSignupActivity,20180711,,1531358531292002,1531358521175002
2,SignupActivity,20180711,,1531358536305003,1531358531292003
3,SigninSignupActivity,20180711,,1531358536478004,1531358536305004
4,SignupActivity,20180711,,1531358937284006,1531358536478006
5,HomeActivity,20180712,,1531376260465000,1531259406844000
6,SigninSignupActivity,20180712,,1531376262537002,1531376260465002
7,SigninActivity,20180712,,1531376312750005,1531376262537005
8,HomeActivity,20180712,,1531376320627007,1531376312750007
9,SettingsActivity,20180712,,1531376325091009,1531376320627009


### SMS vs MMS

In [26]:
query = """
SELECT
    type as type,
    COUNT(username) AS events
FROM
    (SELECT
        (SELECT event_params.value.string_value
            FROM UNNEST(event_params) AS event_params
            WHERE event_params.key = 'type') AS type,
        event_date AS date,
        user_properties.value.string_value AS username,
        event_timestamp AS timestamp,
        event_previous_timestamp AS previous_timestamp
    FROM
        `analytics_153084895.events_*`,
         UNNEST(user_properties) AS user_properties
    WHERE 
        event_name = 'text_message_received' OR event_name = "risky_sent_sms_warning_event")
GROUP BY
    type
"""
df = client.query(query).to_dataframe()
df.head(10)

Unnamed: 0,type,events
0,sms,63343
1,mms,9665


### Time Between Texts

In [8]:
query = """
SELECT
    t1.event_date AS date,
    t1.event_name AS event_name,
    LAG(t1.event_timestamp) OVER (PARTITION BY username ORDER BY t1.event_timestamp) AS previous_timestamp,
    t1.event_timestamp AS current_timestamp,
    t1.event_timestamp - LAG(t1.event_timestamp) OVER (PARTITION BY username ORDER BY t1.event_timestamp) AS time_delta,
    username AS username
FROM 
    (SELECT MAX(event_date) AS event_date, MAX(event_timestamp) AS event_timestamp, event_name, user_properties.value.string_value AS username
        FROM `analytics_153084895.events_*`,
        UNNEST(user_properties) AS user_properties
        GROUP BY event_name, event_server_timestamp_offset, username
        ) AS t1
WHERE
    username = 'Shad Christie' AND 
    (t1.event_name = "risky_sent_sms_warning_event" OR event_name = "risky_sms_event")
LIMIT 100
"""
df = client.query(query).to_dataframe()
df.head(10)

Unnamed: 0,date,event_name,previous_timestamp,current_timestamp,time_delta,username


### Unique Device Models

In [31]:
query = """
SELECT
    COUNT(device.mobile_os_hardware_model),
    device.mobile_os_hardware_model AS model
FROM
    `heycharlie-ada47.analytics_153084895.events_*`
WHERE
    event_date > '20181109'
GROUP BY
    device.mobile_os_hardware_model
ORDER BY
    COUNT(device.mobile_os_hardware_model) DESC
"""
df = client.query(query).to_dataframe()
df.head(10)

Unnamed: 0,f0_,model
0,51772,SM-N920V
1,20830,SAMSUNG-SM-G891A
2,20583,Pixel XL
3,16159,SM-G950U
4,15949,SM-G930V
5,13467,Pixel
6,8870,A577VL
7,8678,SM-G930P
8,8054,Pixel 3 XL
9,7917,Pixel 2


### Weekly Questionnaire Instances

TODO It looks like the event_name may have changed...

In [33]:
query = """
SELECT
    event_date AS Date,
    COUNT(event_name) AS event
FROM
    `analytics_153084895.events_*`,
    UNNEST(event_params) AS event_params
WHERE
    event_name = "weekly_questionnaire_notification" AND key = "notification_event"
GROUP BY
    event_date
ORDER BY
    event_date DESC
"""
df = client.query(query).to_dataframe()
df.head(10)

Unnamed: 0,Date,event
0,20180911,1
1,20180906,5
2,20180905,1
3,20180828,3
4,20180827,1
5,20180820,1
6,20180814,1
7,20180813,2
8,20180811,1
9,20180808,1
