In [None]:
import os
import sys

## Add convida lib and convida server lib to path
convida_lib_path = os.path.dirname(os.getcwd())
lib = os.path.join(convida_lib_path,'lib')
sys.path.append(lib)

In [2]:
from convida import COnVIDa
from regions import Regions
from datatype import DataType
import pandas as pd
import h5py
import os.path
import time

## Generation of cache

### Temporal

In [3]:
print('GENERATING CACHÉ...')
print()

all_regions = Regions.get_regions('ES')
print('REGIONS: ', ', '.join(all_regions))
print()
datasources = COnVIDa.get_data_items_names(DataType.TEMPORAL,language='internal')
all_data_items = []
for data_items in datasources.values():
    all_data_items += data_items
print('DATA ITEMS: ', ', '.join(all_data_items))
print()

UPDATING CACHÉ...

REGIONS:  Andalucía, Aragón, Asturias, Baleares, Canarias, Cantabria, Castilla La Mancha, Castilla y León, Cataluña, Ceuta, C. Valenciana, Extremadura, Galicia, Madrid, Melilla, Murcia, Navarra, País Vasco, La Rioja

DATA ITEMS:  prec, presMax, presMin, racha, sol, tmax, tmed, tmin, velmedia, altitud, dir, altas, casos, confirmados_pcr, confirmados_test, fallecidos, hospitalizados, uci, grocery_and_pharmacy_percent_change_from_baseline, parks_percent_change_from_baseline, residential_percent_change_from_baseline, retail_and_recreation_percent_change_from_baseline, transit_stations_percent_change_from_baseline, workplaces_percent_change_from_baseline, driving, defunciones_observadas, defunciones_observadas_lim_inf, defunciones_observadas_lim_sup, defunciones_esperadas, defunciones_esperadas_q01, defunciones_esperadas_q99



In [4]:
%%time
start = pd.to_datetime('2016-01-01', format='%Y-%m-%d')
end = pd.to_datetime('today', format='%Y-%m-%d')

temporal_data = COnVIDa.get_data_items(regions=all_regions,
                          data_items=all_data_items,
                          start_date=start,
                          end_date=end,
                          language='internal',
                          errors='raise')

temporal_data.to_hdf(path_or_buf=f'data/{str(end)[0:10]}.h5',key='temporal',mode='a')

Assumed a TEMPORAL data retrieval...
Wall time: 4min 23s


In [5]:
%%time

## Read info of generated cache
temporal_data = pd.read_hdf(path_or_buf=f'data/{str(end)[0:10]}.h5',
                            key='temporal',
                            mode='r')
temporal_data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1617 entries, 2016-01-01 to 2020-06-04
Freq: D
Columns: 583 entries, ('Andalucía', 'altas') to ('País Vasco', 'workplaces_percent_change_from_baseline')
dtypes: float64(583)
memory usage: 7.2 MB
Wall time: 70.8 ms


### Geographical

In [6]:
print('UPDATING CACHÉ...')
print()

all_regions = Regions.get_regions('ES')
print('REGIONS: ', ', '.join(all_regions))
print()
datasources = COnVIDa.get_data_items_names(DataType.GEOGRAPHICAL,
                                              language='internal')
all_data_items = []
for data_items in datasources.values():
    all_data_items += data_items
print('DATA ITEMS: ', ', '.join(all_data_items))

UPDATING CACHÉ...

REGIONS:  Andalucía, Aragón, Asturias, Baleares, Canarias, Cantabria, Castilla La Mancha, Castilla y León, Cataluña, Ceuta, C. Valenciana, Extremadura, Galicia, Madrid, Melilla, Murcia, Navarra, País Vasco, La Rioja

DATA ITEMS:  actividad_fisica, imc, tabaco, hogares_tipo_familia, hogares_densidad_ocupacion, mayores_65_solos


In [7]:
%%time
geographical_data = COnVIDa.get_data_items(regions=all_regions,
                              data_items=all_data_items,
                              language='internal')

geographical_data.to_hdf(path_or_buf=f'data/{str(end)[0:10]}.h5',key='geographical',mode='a')

Assumed a GEOGRAPHICAL data retrieval...
Wall time: 3.75 s


In [8]:
%%time

## Read info of generated cache

geographical_data = pd.read_hdf(path_or_buf=f'data/{str(end)[0:10]}.h5',
                            key='geographical',
                            mode='r')
geographical_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 19 entries, Andalucía to País Vasco
Data columns (total 35 columns):
 #   Column                                                               Non-Null Count  Dtype  
---  ------                                                               --------------  -----  
 0   actividad_fisica (Nivel alto)                                        19 non-null     float64
 1   actividad_fisica (Nivel bajo)                                        19 non-null     float64
 2   actividad_fisica (Nivel moderado)                                    19 non-null     float64
 3   actividad_fisica (No consta)                                         19 non-null     float64
 4   hogares_densidad_ocupacion (60 m2 o más por ocupante)                19 non-null     float64
 5   hogares_densidad_ocupacion (Entre 10 y menos de 20 m2 por ocupante)  19 non-null     float64
 6   hogares_densidad_ocupacion (Entre 20 y menos de 30 m2 por ocupante)  19 non-null     float64
 7  