In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import matplotlib.pyplot as plt

%matplotlib inline

# Parameters

In [None]:
input_file_name = '../../data/enriched/sample_2018.csv'

# Load data

In [None]:
%%time

world = gpd.read_file(
    gpd.datasets.get_path('naturalearth_lowres')
)

In [None]:
def transform_to_geo_data(df: pd.DataFrame) -> gpd.GeoDataFrame:
    gdf = gpd.GeoDataFrame(
        df, geometry=[
            Point(xy)
            for xy in zip(
                df.longitudeE7 / 10_000_000,
                df.latitudeE7 / 10_000_000
            )
        ]
    )
    # Setting the geometry by hand: http://geopandas.org/projections.html
    gdf.crs = {"init": "epsg:4326"}
    return gdf

In [None]:
%%time

gdf = transform_to_geo_data(
    pd.read_csv(input_file_name)
)

# Show gps data against world map

In [None]:
%%time

gdf.plot(
    ax=world.plot(facecolor='lightgray', edgecolor='gray', figsize=(10, 6)),
    marker='o', color='red', markersize=15
)

bounds = gdf.geometry.bounds

plt.xlim([bounds.minx.min()-5, bounds.maxx.max()+5])
plt.ylim([bounds.miny.min()-5, bounds.maxy.max()+5])

# Show time spend in each country

In [None]:
%%time

country_summary = (
    gpd
    .sjoin(gdf, world, how='inner', op='intersects')
    .groupby('name').agg({'duration': 'sum'})
)

tot_duration = country_summary.duration.sum()

country_summary['%'] = country_summary.apply(
    lambda row: row.duration/tot_duration * 100,
    axis=1
)

country_summary.sort_values('duration', ascending=True)