In [None]:
# auto format code at cell execution
%load_ext lab_black

root_location = "../../"
import sys

sys.path.append(root_location)

import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt

from src.tools.pre_processing import transform_to_geo_data

%matplotlib inline

# Parameters

In [None]:
input_file_name = root_location + "data/samples/year_2018.csv"

# Load data

In [None]:
%%time

world = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres"))

In [None]:
%%time

gdf = transform_to_geo_data(pd.read_csv(input_file_name))

# Show gps data against world map

In [None]:
%%time

gdf.plot(
    ax=world.plot(facecolor="lightgray", edgecolor="gray", figsize=(10, 6)),
    marker="o",
    color="red",
    markersize=15,
)

bounds = gdf.geometry.bounds

plt.xlim([bounds.minx.min() - 5, bounds.maxx.max() + 5])
plt.ylim([bounds.miny.min() - 5, bounds.maxy.max() + 5])

# Show time spend in each country

In [None]:
%%time

country_summary = (
    gpd.sjoin(gdf, world, how="inner", op="intersects")
    .groupby("name")
    .agg({"duration": "sum"})
)

tot_duration = country_summary.duration.sum()

country_summary["%"] = country_summary.apply(
    lambda row: row.duration / tot_duration * 100, axis=1
)

country_summary.sort_values("duration", ascending=False)