In [1]:
%run "../../../common/0_notebooks_base_setup.py"

! pip install uk-covid19

/Users/csuarezgurruchaga/Desktop/Digital-House/CLASE_51/dsad_2021/common
default checking
Running command `conda list`... ok
jupyterlab=2.2.6 already installed
pandas=1.3.0 already installed
bokeh=2.2.3 already installed
seaborn=0.11.0 already installed
matplotlib=3.3.2 already installed
ipywidgets=7.5.1 already installed
pytest=6.2.1 already installed
chardet=4.0.0 already installed
psutil=5.7.2 already installed
scipy=1.5.2 already installed
statsmodels=0.12.1 already installed
scikit-learn=0.23.2 already installed
xlrd=2.0.1 already installed
nltk=3.5 already installed
unidecode=1.1.1 already installed
pydotplus=2.0.2 already installed
pandas-datareader=0.10.0 already installed
flask=1.1.2 already installed


<img src='../../../common/logo_DH.png' align='left' width=35%/>

# Checkpoint APIs

---

En esta práctica vamos a usar una api que disponibiliza datos de COVID-19 en Reino Unido

La documentación de la API está disponible en https://coronavirus.data.gov.uk/details/developers-guide

Todos los pedidos (requests) a la API son sobre HTTPS.

También proveen una biblioteca para acceso a los datos https://github.com/publichealthengland/coronavirus-dashboard-api-python-sdk

En la primera parte de esta práctica vamos a usar un request para consultar cierta información (<a href="https://coronavirus.data.gov.uk/details/developers-guide#sdks">documentación</a>), y en la segunda parte vamos a consultar la misma info usando la bilbioteca que provee <a href="https://github.com/publichealthengland/coronavirus-dashboard-api-python-sdk">Public Health England</a>


## Imports


In [2]:
import pandas as pd
import numpy as np
from requests import get
from json import dumps
from datetime import date, timedelta

from uk_covid19 import Cov19API

## Ejercicio 1

Usando un web request construir un DataFrame con información sobre casos nuevos y muertes en Inglaterra ("england") el día de ayer:

Los valores de metricName que queremos obtener en la respuesta son:

* date,
* areaName,
* areaCode,
* newCasesByPublishDate,
* cumCasesByPublishDate,
* newDeaths28DaysByPublishDate,
* cumDeaths28DaysByPublishDate

Para ver qué filtros podemos aplicar en la consulta:

https://coronavirus.data.gov.uk/details/developers-guide#params-filters

Según la documentación, la estructura de la respuesta queda definida por 

`structure={[responseName]:[metricName], [responseName]:[metricName]}`

Para ver los valores disponibles para metricName:

https://coronavirus.data.gov.uk/details/developers-guide `See a list of valid metrics for structure`

**Vemos en la documentación que el filtro areaType es requerido para todas las consultas:**

`The areaType metric is mandatory and must be defined in all queries.`





In [3]:
hoy = date.today()
ayer = hoy + timedelta(days=-1)
print(ayer)
print(str(ayer))


2022-02-01
2022-02-01


In [4]:
ENDPOINT = "https://api.coronavirus.data.gov.uk/v1/data"

#valores para los filtros:

AREA_TYPE = "nation"
AREA_NAME = "england"
DATE = str(ayer)

filters = [
    f"areaType={ AREA_TYPE }",
    f"areaName={ AREA_NAME }",
    f"date={ DATE }"
]

# estructura de la respuesta

structure = {
    "date": "date",
    "name": "areaName",
    "code": "areaCode",
    "dailyCases": "newCasesByPublishDate",
    "cumulativeCases": "cumCasesByPublishDate",
    "dailyDeaths": "newDeaths28DaysByPublishDate",
    "cumulativeDeaths": "cumDeaths28DaysByPublishDate"
}

api_params = {
    "filters": str.join(";", filters),
    "structure": dumps(structure, separators=(",", ":")),
    "format":"json"
}


response = get(ENDPOINT, params = api_params, timeout=10)

if response.status_code >= 400:
    raise RuntimeError(f'Request failed: { response.text }')

print(response.url)
print("---")
print(response.json())


https://api.coronavirus.data.gov.uk/v1/data?filters=areaType%3Dnation%3BareaName%3Dengland%3Bdate%3D2022-02-01&structure=%7B%22date%22%3A%22date%22%2C%22name%22%3A%22areaName%22%2C%22code%22%3A%22areaCode%22%2C%22dailyCases%22%3A%22newCasesByPublishDate%22%2C%22cumulativeCases%22%3A%22cumCasesByPublishDate%22%2C%22dailyDeaths%22%3A%22newDeaths28DaysByPublishDate%22%2C%22cumulativeDeaths%22%3A%22cumDeaths28DaysByPublishDate%22%7D&format=json
---
{'length': 1, 'maxPageLimit': 2500, 'totalRecords': 4, 'data': [{'date': '2022-02-01', 'name': 'England', 'code': 'E92000001', 'dailyCases': 103353, 'cumulativeCases': 14948735, 'dailyDeaths': 185, 'cumulativeDeaths': 136596}], 'requestPayload': {'structure': {'date': 'date', 'name': 'areaName', 'code': 'areaCode', 'dailyCases': 'newCasesByPublishDate', 'cumulativeCases': 'cumCasesByPublishDate', 'dailyDeaths': 'newDeaths28DaysByPublishDate', 'cumulativeDeaths': 'cumDeaths28DaysByPublishDate'}, 'filters': [{'identifier': 'areaType', 'operator': 

Veamos qué datos vienen en el response

In [5]:
response_dict = response.json()
response_dict.keys()

dict_keys(['length', 'maxPageLimit', 'totalRecords', 'data', 'requestPayload', 'pagination'])

In [6]:
# cantidad de registros

response_dict['length']

1

In [7]:
response_dict['maxPageLimit']

2500

In [8]:
response_dict['data']

[{'date': '2022-02-01',
  'name': 'England',
  'code': 'E92000001',
  'dailyCases': 103353,
  'cumulativeCases': 14948735,
  'dailyDeaths': 185,
  'cumulativeDeaths': 136596}]

In [9]:
# qué llega al server en el request:

response_dict['requestPayload']

{'structure': {'date': 'date',
  'name': 'areaName',
  'code': 'areaCode',
  'dailyCases': 'newCasesByPublishDate',
  'cumulativeCases': 'cumCasesByPublishDate',
  'dailyDeaths': 'newDeaths28DaysByPublishDate',
  'cumulativeDeaths': 'cumDeaths28DaysByPublishDate'},
 'filters': [{'identifier': 'areaType', 'operator': '=', 'value': 'nation'},
  {'identifier': 'areaName', 'operator': '=', 'value': 'england'},
  {'identifier': 'date', 'operator': '=', 'value': '2022-02-01'}],
 'page': 1}

In [10]:
response_dict['pagination']

{'current': '/v1/data?filters=areaType=nation;areaName=england;date=2022-02-01&structure={"date":"date","name":"areaName","code":"areaCode","dailyCases":"newCasesByPublishDate","cumulativeCases":"cumCasesByPublishDate","dailyDeaths":"newDeaths28DaysByPublishDate","cumulativeDeaths":"cumDeaths28DaysByPublishDate"}&format=json&page=1',
 'next': None,
 'previous': None,
 'first': '/v1/data?filters=areaType=nation;areaName=england;date=2022-02-01&structure={"date":"date","name":"areaName","code":"areaCode","dailyCases":"newCasesByPublishDate","cumulativeCases":"cumCasesByPublishDate","dailyDeaths":"newDeaths28DaysByPublishDate","cumulativeDeaths":"cumDeaths28DaysByPublishDate"}&format=json&page=1',
 'last': '/v1/data?filters=areaType=nation;areaName=england;date=2022-02-01&structure={"date":"date","name":"areaName","code":"areaCode","dailyCases":"newCasesByPublishDate","cumulativeCases":"cumCasesByPublishDate","dailyDeaths":"newDeaths28DaysByPublishDate","cumulativeDeaths":"cumDeaths28Days

In [11]:
response_df = pd.DataFrame(response.json()["data"])
response_df

Unnamed: 0,date,name,code,dailyCases,cumulativeCases,dailyDeaths,cumulativeDeaths
0,2022-02-01,England,E92000001,103353,14948735,185,136596


In [12]:
response.json()["data"]

[{'date': '2022-02-01',
  'name': 'England',
  'code': 'E92000001',
  'dailyCases': 103353,
  'cumulativeCases': 14948735,
  'dailyDeaths': 185,
  'cumulativeDeaths': 136596}]

# Ejercicio 2

Obtener todos los datos que obtuvimos en el ejercicio anterior para los últimos 30 días.


Quitamos el valor de `date` de los filtros y vemos que la primera página trae en orden decreciente de fecha más de treinta días

In [13]:
ENDPOINT = "https://api.coronavirus.data.gov.uk/v1/data"

#valores para los filtros:

AREA_TYPE = "nation"
AREA_NAME = "england"

filters = [
    f"areaType={ AREA_TYPE }",
    f"areaName={ AREA_NAME }"
]

# estructura de la respuesta

structure = {
    "date": "date",
    "name": "areaName",
    "code": "areaCode",
    "dailyCases": "newCasesByPublishDate",
    "cumulativeCases": "cumCasesByPublishDate",
    "dailyDeaths": "newDeaths28DaysByPublishDate",
    "cumulativeDeaths": "cumDeaths28DaysByPublishDate"
}

api_params = {
    "filters": str.join(";", filters),
    "structure": dumps(structure, separators=(",", ":")),
    "format": "json",
    "page": "1"
}


response = get(ENDPOINT, params = api_params, timeout=10)

if response.status_code >= 400:
    raise RuntimeError(f'Request failed: { response.text }')

print(response.url)
print("---")
#print(response.json())

https://api.coronavirus.data.gov.uk/v1/data?filters=areaType%3Dnation%3BareaName%3Dengland&structure=%7B%22date%22%3A%22date%22%2C%22name%22%3A%22areaName%22%2C%22code%22%3A%22areaCode%22%2C%22dailyCases%22%3A%22newCasesByPublishDate%22%2C%22cumulativeCases%22%3A%22cumCasesByPublishDate%22%2C%22dailyDeaths%22%3A%22newDeaths28DaysByPublishDate%22%2C%22cumulativeDeaths%22%3A%22cumDeaths28DaysByPublishDate%22%7D&format=json&page=1
---


In [14]:
response_df = pd.DataFrame(response.json()["data"])
response_df

Unnamed: 0,date,name,code,dailyCases,cumulativeCases,dailyDeaths,cumulativeDeaths
0,2022-02-02,England,E92000001,81446,15028951,519.0,137115.0
1,2022-02-01,England,E92000001,103353,14948735,185.0,136596.0
2,2022-01-31,England,E92000001,81720,14845382,37.0,135509.0
3,2022-01-30,England,E92000001,59559,14023177,75.0,135472.0
4,2022-01-29,England,E92000001,69137,13963618,275.0,135397.0
...,...,...,...,...,...,...,...
729,2020-02-04,England,E92000001,0,2,,
730,2020-02-03,England,E92000001,0,2,,
731,2020-02-02,England,E92000001,0,2,,
732,2020-02-01,England,E92000001,0,2,,


Filtramos por las fechas de interés

In [15]:
max_date = date.today()
min_date = max_date + timedelta(days=-30)

mask = np.logical_and(pd.to_datetime(response_df.date) <= np.datetime64(max_date), pd.to_datetime(response_df.date) >= np.datetime64(min_date))

response_df_last_month = response_df.loc[mask, :]

response_df_last_month.sort_values("date")

Unnamed: 0,date,name,code,dailyCases,cumulativeCases,dailyDeaths,cumulativeDeaths
30,2022-01-03,England,E92000001,137541,11408560,42.0,129474.0
29,2022-01-04,England,E92000001,148725,11557285,25.0,129499.0
28,2022-01-05,England,E92000001,150232,11706635,316.0,129815.0
27,2022-01-06,England,E92000001,152306,11858941,203.0,130018.0
26,2022-01-07,England,E92000001,149405,12008346,193.0,130211.0
25,2022-01-08,England,E92000001,130330,12132895,282.0,130493.0
24,2022-01-09,England,E92000001,121228,12254123,84.0,130577.0
23,2022-01-10,England,E92000001,115998,12370121,55.0,130632.0
22,2022-01-11,England,E92000001,104833,12469614,358.0,130990.0
21,2022-01-12,England,E92000001,116173,12585787,361.0,131351.0


In [16]:
response_dict = response.json()
response_dict.keys()

dict_keys(['length', 'maxPageLimit', 'totalRecords', 'data', 'requestPayload', 'pagination'])

In [17]:
response_df.shape[0] == response_dict['length']

True

In [18]:
response_dict['maxPageLimit']

2500

In [19]:
response_dict['requestPayload']

{'structure': {'date': 'date',
  'name': 'areaName',
  'code': 'areaCode',
  'dailyCases': 'newCasesByPublishDate',
  'cumulativeCases': 'cumCasesByPublishDate',
  'dailyDeaths': 'newDeaths28DaysByPublishDate',
  'cumulativeDeaths': 'cumDeaths28DaysByPublishDate'},
 'filters': [{'identifier': 'areaType', 'operator': '=', 'value': 'nation'},
  {'identifier': 'areaName', 'operator': '=', 'value': 'england'}],
 'page': 1}

Vemos que el response trajo el resultado de la consulta en una única página porque vemos que `response_dict['pagination']["next"]` es nulo

In [20]:
response_dict['pagination']

{'current': '/v1/data?filters=areaType=nation;areaName=england&structure={"date":"date","name":"areaName","code":"areaCode","dailyCases":"newCasesByPublishDate","cumulativeCases":"cumCasesByPublishDate","dailyDeaths":"newDeaths28DaysByPublishDate","cumulativeDeaths":"cumDeaths28DaysByPublishDate"}&format=json&page=1',
 'next': None,
 'previous': None,
 'first': '/v1/data?filters=areaType=nation;areaName=england&structure={"date":"date","name":"areaName","code":"areaCode","dailyCases":"newCasesByPublishDate","cumulativeCases":"cumCasesByPublishDate","dailyDeaths":"newDeaths28DaysByPublishDate","cumulativeDeaths":"cumDeaths28DaysByPublishDate"}&format=json&page=1',
 'last': '/v1/data?filters=areaType=nation;areaName=england&structure={"date":"date","name":"areaName","code":"areaCode","dailyCases":"newCasesByPublishDate","cumulativeCases":"cumCasesByPublishDate","dailyDeaths":"newDeaths28DaysByPublishDate","cumulativeDeaths":"cumDeaths28DaysByPublishDate"}&format=json&page=1'}

In [21]:
response_dict['pagination']["next"] is None

True

Alternativa: 

Hacer un ciclo iterando sobre cada fecha de interés. Usar sleep entre consultas sucesivas https://www.programiz.com/python-programming/time/sleep
    

## Ejercicio 3

Repetir el ejercicio 1 usando la bilbioteca que provee Public Health England

Documentación: https://publichealthengland.github.io/coronavirus-dashboard-api-python-sdk/pages/examples/general_use.html#

In [22]:
AREA_TYPE = "nation"
AREA_NAME = "england"
DATE = str(ayer)

ej3_filters = [
    f"areaType={ AREA_TYPE }",
    f"areaName={ AREA_NAME }",
    f"date={ DATE }"
]

#print(ej3_filters)

# estructura de la respuesta

ej3_structure = {
    "date": "date",
    "name": "areaName",
    "code": "areaCode",
    "dailyCases": "newCasesByPublishDate",
    "cumulativeCases": "cumCasesByPublishDate",
    "dailyDeaths": "newDeaths28DaysByPublishDate",
    "cumulativeDeaths": "cumDeaths28DaysByPublishDate"
}

api = Cov19API(filters = ej3_filters, structure = ej3_structure)

data_df = api.get_dataframe()

data_df

Unnamed: 0,date,name,code,dailyCases,cumulativeCases,dailyDeaths,cumulativeDeaths
0,2022-02-01,England,E92000001,103353,14948735,185,136596


In [23]:
data_json = api.get_json()
data_json

{'data': [{'date': '2022-02-01',
   'name': 'England',
   'code': 'E92000001',
   'dailyCases': 103353,
   'cumulativeCases': 14948735,
   'dailyDeaths': 185,
   'cumulativeDeaths': 136596}],
 'lastUpdate': '2022-02-02T16:00:14.000000Z',
 'length': 1,
 'totalPages': 1}

In [24]:
data_csv = api.get_csv()
data_csv

'date,name,code,dailyCases,cumulativeCases,dailyDeaths,cumulativeDeaths\n2022-02-01,England,E92000001,103353,14948735,185,136596\n'

## Ejercicio 4

Repetir el ejercicio 2 usando la bilbioteca que provee Public Health England


In [25]:
AREA_TYPE = "nation"
AREA_NAME = "england"

ej4_filters = [
    f"areaType={ AREA_TYPE }",
    f"areaName={ AREA_NAME }"
]

# estructura de la respuesta

ej4_structure = {
    "date": "date",
    "name": "areaName",
    "code": "areaCode",
    "dailyCases": "newCasesByPublishDate",
    "cumulativeCases": "cumCasesByPublishDate",
    "dailyDeaths": "newDeaths28DaysByPublishDate",
    "cumulativeDeaths": "cumDeaths28DaysByPublishDate"
}

api = Cov19API(filters = ej4_filters, structure = ej4_structure)

data_df = api.get_dataframe()

data_df

Unnamed: 0,date,name,code,dailyCases,cumulativeCases,dailyDeaths,cumulativeDeaths
0,2022-02-02,England,E92000001,81446,15028951,519.0,137115.0
1,2022-02-01,England,E92000001,103353,14948735,185.0,136596.0
2,2022-01-31,England,E92000001,81720,14845382,37.0,135509.0
3,2022-01-30,England,E92000001,59559,14023177,75.0,135472.0
4,2022-01-29,England,E92000001,69137,13963618,275.0,135397.0
...,...,...,...,...,...,...,...
729,2020-02-04,England,E92000001,0,2,,
730,2020-02-03,England,E92000001,0,2,,
731,2020-02-02,England,E92000001,0,2,,
732,2020-02-01,England,E92000001,0,2,,


Filtramos por las fechas de interés

In [26]:
max_date = date.today()
min_date = max_date + timedelta(days=-30)

mask = np.logical_and(pd.to_datetime(data_df.date) <= np.datetime64(max_date), pd.to_datetime(data_df.date) >= np.datetime64(min_date))

data_df_last_month = data_df.loc[mask, :]

data_df_last_month.sort_values("date")

Unnamed: 0,date,name,code,dailyCases,cumulativeCases,dailyDeaths,cumulativeDeaths
30,2022-01-03,England,E92000001,137541,11408560,42.0,129474.0
29,2022-01-04,England,E92000001,148725,11557285,25.0,129499.0
28,2022-01-05,England,E92000001,150232,11706635,316.0,129815.0
27,2022-01-06,England,E92000001,152306,11858941,203.0,130018.0
26,2022-01-07,England,E92000001,149405,12008346,193.0,130211.0
25,2022-01-08,England,E92000001,130330,12132895,282.0,130493.0
24,2022-01-09,England,E92000001,121228,12254123,84.0,130577.0
23,2022-01-10,England,E92000001,115998,12370121,55.0,130632.0
22,2022-01-11,England,E92000001,104833,12469614,358.0,130990.0
21,2022-01-12,England,E92000001,116173,12585787,361.0,131351.0


## Referencias y Material Adicional
---

https://coronavirus.data.gov.uk/details/developers-guide

https://github.com/publichealthengland/coronavirus-dashboard-api-python-sdk

https://apidocs.data.world/toolkit/api/clients

https://apidocs.data.world/toolkit/rest-api

https://github.com/datadotworld/data.world-py

https://datosgobar.github.io/series-tiempo-ar-api/

https://datosgobar.github.io/series-tiempo-ar-api/python-usage/