In [None]:
import os
import numpy as np
import pandas as pd

import geopy
import plotly_express as px
import plotly.graph_objects as go
from geopy.geocoders import Nominatim, GoogleV3

import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
df_users = pd.read_csv(os.path.join('dataset', 'user_simulated.csv'))
df_users.head(3)

In [None]:
df_gps = pd.read_csv(os.path.join('dataset', 'gps_log_simulated.csv'))
df_gps['timestamp'] = pd.to_datetime(df_gps['timestamp'], format='%m/%d/%Y %H:%M%S')
df_gps.head(3)

In [None]:
df = pd.merge(left=df_users, right=df_gps, on='user_id', how='inner')
df.info()

In [None]:
df['day'] = df['timestamp'].apply(lambda t: t.date().day)
df['hour'] = df['timestamp'].apply(lambda t: t.hour)

# Age bin
bins = [0, 10, 20, 30, 40, 50, 60, 70]
labels = ['0-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60 and older']
df['age_group'] = pd.cut(df['age'], bins=bins, labels=labels)

In [None]:
# Sort dataframe
df = df.sort_values(by=['user_id', 'timestamp'], ascending=True)
df.head()

In [None]:
px.set_mapbox_access_token('pk.eyJ1IjoiYWt3aW4iLCJhIjoiY2s4cXF3Nm8xMDV2NzNlcDFyZHE3bDRiayJ9.A_uFEJFBXXKCO7RifbxvHw')
px.scatter_mapbox(
    df,
    lat='home_latitude',
    lon='home_longitude',
    hover_name='name',
    zoom=8,
    title='User\'s Home Coordinates'
)

In [None]:
# Last recorded position of all users at 3 Apr 2020
subset = df[df['day']==3].sort_values(by=['user_id', 'timestamp'])
subset = subset.groupby(by=['user_id']).last().reset_index()

px.scatter_mapbox(
    subset,
    lat='latitude',
    lon='longitude',
    hover_name='name',
    zoom=8,
    title='Last recorded positions of users on April 3, 2020'
)

In [None]:
px.scatter_mapbox(
    df,
    lat='latitude',
    lon='longitude',
    hover_name='name',
    animation_frame='day',
    color='sex',
    zoom=8,
    title='How do users travel in this week (April 2, 2020 to April 8, 2020)'
)

In [None]:
px.scatter_mapbox(
    df,
    lat='latitude',
    lon='longitude',
    hover_name='name',
    animation_frame='day',
    color='age_group',
    zoom=8,
    title='How do users travel in this week (April 2, 2020 to April 8, 2020)'
)

In [None]:
# How user_id 100 moved around the city in this week
user_100 = df[(df['user_id'].isin([10]))].sort_values(by=['day', 'hour'])
home_lon = user_100['home_longitude'].values
home_lat = user_100['home_latitude'].values
home_label = 'Home of ' + user_100.name
fig = px.scatter_mapbox(
    user_100,
    lat='latitude',
    lon='longitude',
    hover_name='name',
    animation_frame='day',
    zoom=8,
    title='How user_id 100 moved around the city (April 2, 2020 to April 8, 2020)',
)
fig.add_trace(go.Scattermapbox(
    mode='markers+text',
    lon=home_lon,
    lat=home_lat,
    marker={'size': 20},
    showlegend=False,
    text=home_label, 
    textposition='bottom right',
))


In [None]:
user_100 = df[(df['user_id'].isin([10]) & (df['day']==5))].sort_values(by=['day', 'hour'])
home_lon = user_100['home_longitude'].values
home_lat = user_100['home_latitude'].values
home_label = 'Home of ' + user_100.name
fig = px.scatter_mapbox(
    user_100,
    lat='latitude',
    lon='longitude',
    hover_name='name',
    animation_frame='hour',
    zoom=8,
    title='How user_id 100 moved around the city on April 5, 2020',
)
fig.add_trace(go.Scattermapbox(
    mode='markers+text',
    lon=home_lon,
    lat=home_lat,
    marker={'size': 20},
    showlegend=False,
    text=home_label, 
    textposition='bottom right',
))