In [None]:
import utils

import geopandas as gpd
from shapely.geometry import Point

import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
world.crs = {'init' :'epsg:4326'}
world.plot(cmap='Set3', figsize=(10, 6))

In [None]:
%time
df = utils.load_data("../data/LocationHistory2018.json")

In [None]:
gdf = gpd.GeoDataFrame(
    df, geometry=[
        Point(xy) 
        for xy in zip(
            df.longitudeE7 / 10_000_000,
            df.latitudeE7 / 10_000_000
        )
    ]
)

# Setting the geometry by hand: http://geopandas.org/projections.html
gdf.crs = {'init' :'epsg:4326'}

In [None]:
gdf.plot(
    ax=world.plot(facecolor='lightgray', edgecolor='gray', figsize=(10, 6)),
    marker='o', color='red', markersize=15
)

bounds = gdf.geometry.bounds

plt.xlim([bounds.minx.min()-5, bounds.maxx.max()+5])
plt.ylim([bounds.miny.min()-5, bounds.maxy.max()+5])

In [None]:
country_summary = (
    gpd
    .sjoin(gdf, world, how='inner', op='intersects')
    .groupby('name').agg({'duration': 'sum'})
)

country_summary.rename(index=str, columns={'name': 'country'}, inplace=True)

tot_duration = country_summary.duration.sum()

country_summary['%'] = country_summary.apply(lambda row: row.duration/tot_duration * 100, axis=1)

country_summary.sort_values('duration', ascending=False)