# Comparison of data


by soniame@iadb.org

- **Google COVID-19 Mobility Report**

[Google link](https://www.google.com/covid19/mobility/)

Baseline: median value for the corresponding day of week during a 5 week period from January 3rd, 2020 to February 6th, 2020. 

- **Waze driven kilometers** driven kilometers percent change from baseline. 

[Waze link](https://www.waze.com/covid19)

Baseline: average value for the corresponding day of week during a 2 week period from February 11th, 2020 to February 25th, 2020.

In [85]:
# %load first_cell.py
%reload_ext autoreload
%autoreload 2
from pathlib import Path
home = str(Path.home())

import sys
sys.path = sys.path + [f'{home}/.conda/envs/norm_env/lib/python37.zip', 
                       f'{home}/.conda/envs/norm_env/lib/python3.7', 
                       f'{home}/.conda/envs/norm_env/lib/python3.7/lib-dynload', 
                       f'{home}/.conda/envs/norm_env/lib/python3.7/site-packages']
sys.prefix = '/home/soniame/.conda/envs/norm_env'

from paths import RAW_PATH, TREAT_PATH, OUTPUT_PATH, FIGURES_PATH

from copy import deepcopy
import numpy as np
import pandas as pd
pd.options.display.max_columns = 999
import yaml
import matplotlib.pyplot as plt 
import datetime

import warnings
warnings.filterwarnings('ignore')

# Plotting
import plotly
import plotly.graph_objs as go
import cufflinks as cf
plotly.offline.init_notebook_mode(connected=True)

def iplottitle(title, width=40):
    return '<br>'.join(textwrap.wrap(title, width))
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})

import plotnine as p9
from mizani.breaks import date_breaks
from mizani.formatters import date_format
p9.theme_set(p9.theme_linedraw()) # default theme

# Setting cufflinks
import textwrap
import cufflinks as cf
cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)
import yaml
from jinja2 import Template
cf.themes.THEMES['custom'] = yaml.load(open('cufflinks_template.yaml', 'r'))

# Manipulation 
from siuba import *

In [86]:
from src import utils
from datetime import datetime
conn = utils.connect_athena(path='../configs/athena.yaml')

In [91]:
iso_codes = pd.read_sql_query("""
select 
    distinct country_iso_code
from spd_sdv_waze_corona.prod_daily_daily_index
""", conn)

In [88]:
len(iso_codes.country_iso_code.to_list())

20

## Load data

Loading data for latinamerican countries. 

### Waze Dashboard

In [96]:
df_dash = pd.read_sql_query("""
select 
    *,
    (date_parse(concat(cast(year as varchar), ' ', cast(month as varchar), ' ', cast(day as varchar)), '%Y %m %e')) as date
from spd_sdv_waze_corona.prod_daily_daily_index
where region_slug like '%country%'
""", conn)

In [98]:
tab = (df_dash 
 >> group_by('country_iso_code', 'country_name', 'region_slug')
 >> summarize(min_date = _.date.min(),
             max_date = _.date.max(),
             n_obs = _.date.count())
 >> ungroup()
)
#tab.to_csv('~/private/projects/google_mob_country_list.csv')
tab

Unnamed: 0,country_iso_code,country_name,region_slug,min_date,max_date,n_obs
0,AR,Argentina,country_argentina,2020-03-09,2021-02-16,345
1,BB,Barbados,country_barbados,2020-03-09,2021-02-16,345
2,BR,Brazil,country_brazil,2020-03-09,2021-02-16,345
3,CL,Chile,country_chile,2020-03-09,2021-02-16,345
4,CO,Colombia,country_colombia,2020-03-09,2021-02-16,345
5,CR,Costa Rica,country_costa_rica,2020-03-09,2021-02-16,345
6,DO,Dominican Republic,country_dominican_republic,2020-03-09,2021-02-16,345
7,EC,Ecuador,country_ecuador,2020-03-09,2021-02-16,345
8,GT,Guatemala,country_guatemala,2020-03-09,2021-02-16,345
9,HN,Honduras,country_honduras,2020-03-09,2021-02-16,345


In [100]:
print(len(iso_codes.country_iso_code.to_list()))
#df_dash.head()
df_dash.country_iso_code.unique()

20


array(['CL', 'PE', 'BR', 'BB', 'AR', 'MX', 'PY', 'EC', 'GT', 'CO', 'DO',
       'NI', 'SV', 'CR', 'JM', 'TT', 'PA', 'HN', 'UY'], dtype=object)

### Waze driven km

In [149]:
df_waze = pd.read_csv("/home/soniame/shared/spd-sdv-omitnik-waze/corona/mobility/Waze _ COVID-19 Impact Dashboard_City-Level Data_Table.csv")
df_waze.columns = ['date', 'city', 'country', 'pc_driven_miles']
df_waze['date'] = df_waze.date.apply(lambda x : datetime.strptime(x, '%b %d, %Y'))
df_waze.head()

Unnamed: 0,date,city,country,pc_driven_miles
0,2021-02-10,San Francisco,United States,-0.5
1,2021-02-10,London,United Kingdom,-0.28
2,2021-02-10,Paris,France,-0.27
3,2021-02-10,New York,United States,-0.16
4,2021-02-10,Sao Paulo,Brazil,-0.11


In [152]:
df_waze.city.unique()

array(['San Francisco', 'London', 'Paris', 'New York', 'Sao Paulo'],
      dtype=object)

### Google mobility

In [103]:
df_goo = pd.read_csv("~/shared/spd-sdv-omitnik-waze/corona/mobility/Global_Mobility_Report.csv")  
df_goo['inregion']=df_goo.country_region_code.isin(iso_codes.country_iso_code.to_list())
df_goo = df_goo \
    .siu_filter(_.inregion) \
    .siu_filter( _.sub_region_1.isnull(), _.metro_area.isnull())
print(df_goo.shape)

(7211, 15)


In [104]:
tab = (df_goo 
 >> group_by('country_region_code', 'country_region')
 >> summarize(min_date = _.date.min(),
             max_date = _.date.max(),
             n_obs = _.date.count())
 >> ungroup()
)
#tab.to_csv('~/private/projects/google_mob_country_list.csv')
tab

Unnamed: 0,country_region_code,country_region,min_date,max_date,n_obs
0,AR,Argentina,2020-02-15,2021-02-09,361
1,BB,Barbados,2020-02-15,2021-02-09,361
2,BO,Bolivia,2020-02-15,2021-02-09,361
3,BR,Brazil,2020-02-15,2021-02-09,361
4,CL,Chile,2020-02-15,2021-01-31,352
5,CO,Colombia,2020-02-15,2021-02-09,361
6,CR,Costa Rica,2020-02-15,2021-02-09,361
7,DO,Dominican Republic,2020-02-15,2021-02-09,361
8,EC,Ecuador,2020-02-15,2021-02-09,361
9,GT,Guatemala,2020-02-15,2021-02-09,361


In [114]:
print(len(df_goo.country_region_code.unique()))
df_goo.country_region_code.unique()

20


array(['AR', 'BB', 'BO', 'BR', 'CL', 'CO', 'CR', 'DO', 'EC', 'GT', 'HN',
       'JM', 'MX', 'NI', 'PA', 'PE', 'PY', 'SV', 'TT', 'UY'], dtype=object)

In [115]:
df_goo.columns

Index(['country_region_code', 'country_region', 'sub_region_1', 'sub_region_2',
       'metro_area', 'iso_3166_2_code', 'census_fips_code', 'date',
       'retail_and_recreation_percent_change_from_baseline',
       'grocery_and_pharmacy_percent_change_from_baseline',
       'parks_percent_change_from_baseline',
       'transit_stations_percent_change_from_baseline',
       'workplaces_percent_change_from_baseline',
       'residential_percent_change_from_baseline', 'inregion'],
      dtype='object')

## Country comparison

### Plots per data source

In [117]:
df_goo[['date', 'transit_stations_percent_change_from_baseline', 'country_region']] \
    .pivot(index='date', columns='country_region', values='transit_stations_percent_change_from_baseline') \
    .iplot(
    #theme='custom',
    yTitle='TCP',
    title='Google mobility - PC Transit stations'
)

In [154]:
df_dash[['date', 'tcp', 'region_slug']] \
    .pivot(index='date', columns='region_slug', values='tcp') \
    .iplot(
    #theme='custom',
    yTitle='TCP',
    title='Waze IDB dashboard - TCP'
)

In [None]:
df_waze[['date', 'pc', 'region_slug']] \
    .pivot(index='date', columns='region_slug', values='tcp') \
    .iplot(
    #theme='custom',
    yTitle='TCP',
    title='Waze IDB dashboard - TCP'
)