In [None]:
import json
import pandas as pd
import math
import datetime

%matplotlib inline

In [None]:
# reading the JSON data using json.load()
file = './data/GoogleLocation.json'
with open(file) as train_file:
    data_json = json.load(train_file)
    
df = pd.DataFrame({'raw': data_json['locations']})

In [None]:
# Extract the base data
df['timestampMs'] = df.apply(lambda row: int(row['raw']['timestampMs']), axis=1)
df['latitudeE7'] = df.apply(lambda row: row['raw']['latitudeE7'], axis=1)
df['longitudeE7'] = df.apply(lambda row: row['raw']['longitudeE7'], axis=1)
df['accuracy'] = df.apply(lambda row: row['raw']['accuracy'], axis=1)

In [None]:
# Derivatives
df['date'] = pd.to_datetime(df['timestampMs'], unit='ms')
df['year'] = df.apply(lambda row: row['date'].year, axis=1)
df['month'] = df.apply(lambda row: row['date'].month, axis=1)
df['day'] = df.apply(lambda row: row['date'].day, axis=1)

df['time_of_day'] = (df['timestampMs'] % (1000*60*60*24)) / (1000*60*60)
df['duration'] = df['timestampMs'].diff()*-1

In [None]:
df.head()

In [None]:
df_part = pd.DataFrame(df[(df['accuracy'] < 100) & (df['year']==2018) & (df['month']==7) & (df['day']==17)])
df_part['duration'] = df_part['timestampMs'].diff()*-1
df_part['sqrt_duration'] = df_part.apply(lambda row: math.sqrt(row['duration']), axis=1)

df_part.plot.scatter(x='longitudeE7', xerr=df_part['accuracy']*200, y='latitudeE7', c='time_of_day', s=df_part['sqrt_duration'], figsize=(16, 10), colormap='viridis')

In [None]:
df_part['activity_raw'][18505]

In [None]:
df_part['activity_raw'] = df_part.apply(lambda row: row['raw'].get('activity'), axis=1)

def determine_activity_type(activity):
    # The list of all options is an array under the key 'activity'
    options = activity['activity']
    option_index = 0
    
    # skip the UNKNOWN options
    while options[option_index]['type'] == 'UNKNOWN':
        option_index = option_index + 1
    
    # if the first option has high confidence, don't check the rest
    if options[option_index]['confidence'] > 30:
        return options[option_index]['type']
    else:
        return options[option_index]['type'] + '?'
    
    # else TODO
    return '?'

def gather_activities(activities_raw):
    if activities_raw == None:
        return []
    
    found = []
    for activity in activities_raw:
        activity_type = determine_activity_type(activity)
        
        # ignore TILTING activities
        if activity_type == 'TILTING':
            continue
        
        # only add new activities
        if found.count(activity_type) == 0:
            found.append(activity_type)
            
    found.sort()
    return found

df_part['activity'] = df_part.apply(lambda row: gather_activities(row['activity_raw']), axis=1)

known = [[]]

df_part[df_part['activity'].apply(lambda a: a not in known)].head(60)

In [None]:
def plot_single_day(year, month, day):
    df_part = pd.DataFrame(df[(df['accuracy'] < 100) & (df['year']==year) & (df['month']==month) & (df['day']==day)])
    df_part['duration'] = df_part['timestampMs'].diff()*-1
    df_part['sqrt_duration'] = df_part.apply(lambda row: math.sqrt(row['duration']), axis=1)

    df_part.plot.scatter(x='longitudeE7', xerr=df_part['accuracy']*200, y='latitudeE7', c='time_of_day', s=df_part['sqrt_duration'], figsize=(16, 10), colormap='viridis')

for i in range(1, 7):
    plot_single_day(2018, 7, i)