In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt  
from mpl_toolkits.basemap import Basemap  

#%matplotlib inline

plt.rcParams['text.usetex'] = True
plt.rcParams['figure.figsize'] = [10, 8]
plt.rcParams['font.size'] = 16

In [None]:
 # Create a dataframe from the json file in the filepath
raw = pd.io.json.read_json('LocationHistory.json') 
df = raw['locations'].apply(pd.Series) 

In [None]:
df['latitude'] = df['latitudeE7'] * 1e-7
df['longitude'] = df['longitudeE7'] * 1e-7

In [24]:
df.columns

Index([u'accuracy', u'activitys', u'altitude', u'heading', u'latitudeE7', u'longitudeE7', u'timestampMs', u'velocity', u'latitude', u'longitude', u'DistanceFromGreenwhich', u'Deltat'], dtype='object')

In [28]:
df.drop(['activitys', 'altitude', 'heading', 'latitudeE7', 'longitudeE7', 'velocity'] , axis=1, inplace=True)

In [2]:
def Haversine(theta):
    return np.sin(theta/2.0)**2

def DistanceFromGreenwhich(lat, lon):
    R = 6.371e6 # m
    latG, lonG = 51.48, 0.00 # Grenwhich lat and long
    latG = np.radians(latG)
    lonG = np.radians(lonG)
    lat = np.radians(lat)
    lon = np.radians(lon)
    arg = Haversine(lat - latG) + np.cos(latG)*np.cos(lat)*Haversine(lon - lonG)
    return 2 * R * np.arcsin(np.sqrt(arg))

df['DistanceFromGreenwhich'] = DistanceFromGreenwhich(df.latitude, df.longitude)

df_home = df[df.DistanceFromGreenwhich < 300e3]

In [10]:
def PaddingFunction(xL, xR, frac=0.1):
    """ Return xL and xR with an added padding factor of frac either side """
    xRange = xR - xL
    xL_new = xL - frac*xRange
    xR_new = xR + frac*xRange
    return xL_new, xR_new

def GeneratePlot(data, fig=None, ignore_first=False, *args, **kwargs):
    """ Helper function to plot points on a map
    
    Parameters
    ----------
    ignore_first : bool, 
        If true the data in the first df in data is ignored and used only to set 
        up the map 
    """
    if type(data) == pd.core.frame.DataFrame:
        # Single df
        df = data
        df_list = [df]
    elif type(data) == list:
        df_list = data
        df = data[0]

    
    if not fig:
        fig = plt.figure()

    # Calculate some parameters which will be resused]
    lat_0 = df.latitude.mean()
    lon_0 = df.longitude.mean()
    llcrnrlon, urcrnrlon = PaddingFunction(df.longitude.min(), df.longitude.max(), frac=0.3)
    llcrnrlat, urcrnrlat = PaddingFunction(df.latitude.min(), df.latitude.max())

    # Create a map, using the Gall–Peters projection, 
    m = Basemap(projection='gall',  
                  resolution = 'h', 
                  area_thresh = 10000.0,
                  lat_0=lat_0, lon_0=lon_0,
                  llcrnrlon=llcrnrlon,
                  urcrnrlon=urcrnrlon,
                  llcrnrlat=llcrnrlat, 
                  urcrnrlat=urcrnrlat,
                  ax=fig.gca()
                  )

    m.drawcoastlines()
    m.drawcountries()
    m.fillcontinents(color = '#996633')
    m.drawmapboundary(fill_color='#0099FF')

    if ignore_first:
        df_list = df_list[1:]
        
    for df in df_list:
        # Define our longitude and latitude points
        x, y = m(df['longitude'].values, df['latitude'].values)

        # Plot them using round markers of size 6 
        m.plot(x, y, "o", zorder=100, *args, **kwargs)

    return fig

In [7]:
fig = GeneratePlot(df_home, color="r")
plt.show()

### Get only connected data frames

The times for which data is recorded depends on many external factors, cheif amonst these is if I have the gps turned on, or if the phone tries to find its location via wifi. 

In [12]:
fig = GeneratePlot(df_home[df_home.accuracy < 50], color="r")
plt.show()

In [35]:
df_home['Deltat'] = np.concatenate(([0], np.diff(df_home.timestampMs.values.astype(np.float64))))
df_home

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


Unnamed: 0,accuracy,activitys,altitude,heading,latitudeE7,longitudeE7,timestampMs,velocity,latitude,longitude,DistanceFromGreenwhich,Deltat
0,39,,,,509362003,-13984835,1424078580622,,50.936200,-1.398483,114659.466626,0
1,1200,,,,509014593,-13732129,1424078296716,,50.901459,-1.373213,115307.717875,-283906
2,1200,"[{u'activities': [{u'confidence': 100, u'type'...",,,509014593,-13732129,1424078232809,,50.901459,-1.373213,115307.717875,-63907
3,37,,,,509362143,-13984699,1424078170566,,50.936214,-1.398470,114657.828263,-62243
4,45,,,,509362226,-13984751,1424078109921,,50.936223,-1.398475,114657.641908,-60645
5,37,"[{u'activities': [{u'confidence': 100, u'type'...",,,509362341,-13985141,1424078038232,,50.936234,-1.398514,114659.265531,-71689
6,31,,,,509362803,-13985563,1424077978014,,50.936280,-1.398556,114659.013053,-60218
7,36,,,,509362485,-13985295,1424077917093,,50.936248,-1.398529,114659.319781,-60921
8,1200,"[{u'activities': [{u'confidence': 100, u'type'...",,,509014593,-13732129,1424077854200,,50.901459,-1.373213,115307.717875,-62893
9,1200,,,,509014593,-13732129,1424077792960,,50.901459,-1.373213,115307.717875,-61240


In [40]:
plt.hist(df_home.Deltat[df_home.Deltat > -1e5], bins=50, log=True)
plt.show()

In [31]:
df[df.Deltat < -1e10]

Unnamed: 0,accuracy,timestampMs,latitude,longitude,DistanceFromGreenwhich,Deltat
450938,1974,1345968005171,50.738327,-1.717338,145511.381166,-29886687683
