In [None]:
import json
import pandas as pd
import math

%matplotlib inline

In [None]:
# reading the JSON data using json.load()
file = './data/GoogleLocation.json'
with open(file) as train_file:
    data_json = json.load(train_file)
    
df = pd.DataFrame({'raw': data_json['locations']})

In [None]:
# Extract the base data
df['timestampMs'] = df.apply(lambda row: int(row['raw']['timestampMs']), axis=1)
df['latitudeE7'] = df.apply(lambda row: row['raw']['latitudeE7'], axis=1)
df['longitudeE7'] = df.apply(lambda row: row['raw']['longitudeE7'], axis=1)
df['accuracy'] = df.apply(lambda row: row['raw']['accuracy'], axis=1)

# Derivatives
df['date'] = pd.to_datetime(df['timestampMs'], unit='ms')
df['year'] = df.apply(lambda row: row['date'].year, axis=1)
df['month'] = df.apply(lambda row: row['date'].month, axis=1)
df['day'] = df.apply(lambda row: row['date'].day, axis=1)
df['hour'] = df.apply(lambda row: row['date'].hour, axis=1)

In [None]:
df_part = pd.DataFrame(df[(df['accuracy'] < 100) & (df['year']==2018) & (df['month']==7) & (df['day']==17)])

In [None]:
grouped = df.groupby(['year','month','day','hour']).agg({'latitudeE7': ['median', 'var', 'count'], 'longitudeE7': 'var'})

# +epsilon for stability
grouped['lat_var_log'] = grouped.apply(lambda row: math.log(row['latitudeE7']['var']+0.0001), axis=1)

grouped

In [None]:
grouped[grouped['latitudeE7']['count'] == 1]['latitudeE7']['median'].hist(bins=1000)

In [None]:
grouped.hist(['lat_var_log'], bins=100, figsize=(16, 10))

In [None]:
df_part['dx'] = df_part.longitudeE7.diff()
df_part['dy'] = df_part.latitudeE7.diff()
df_part['dot_prod'] = df_part.dx * df_part.dx.shift() + df_part.dy * df_part.dy.shift()
df_part['movement'] = df_part['dot_prod'].rolling(5, center=True).mean()

df_part.head(60)

In [None]:
df_part.plot(x='timestampMs', y='movement')

In [None]:
df_part['gps_var'] = df_part['latitudeE7'].rolling(10, center=True).var()

df_part.plot(x='timestampMs', y='gps_var')

In [None]:
df_part.plot(x='timestampMs', y='latitudeE7')

In [None]:
df_part.plot(x='timestampMs', y='longitudeE7')