# Data retrieval from HDX

## Imports

In [None]:
from hdx.hdx_configuration import Configuration
from hdx.data.dataset import Dataset
from tqdm import tqdm

### HDX API configuration

In [None]:
Configuration.create(hdx_site='prod', user_agent='population_of_concern', hdx_read_only=True)

Get a list of all datasets available in HDX:

In [None]:
list_datasets = Dataset.get_all_dataset_names()

There are 12732 datasets

In [None]:
len(list_datasets)

Create a fuction that dowloads into a local file path all data files from a given list of dataset names of HDX:

In [None]:
def download_data_from_HDX(output_path, dataset_list):
    
    import os
    
    for element in tqdm(dataset_list):
        
        dst = Dataset.read_from_hdx(element)
        rsc = dst.get_resources()
        
        file_path_name = output_path + element + '.' + rsc[0]["format"].lower()
        
        if os.path.exists(file_path_name):
            print("Data from {0} is already in {1}".format(element, output_path))
            
        else:
            url_downloaded, path_to_downloaded_file = rsc[0].download(output_path)
            os.rename(path_to_downloaded_file, file_path_name)
            print("Data from {0} downloaded into {1}".format(element, output_path))

In [None]:
WRITE_FLAG = False

#### People of concern Resisding in a certain country

Define a list with only those datasets that include in their name the string 'refugees-residing':

In [None]:
residing = [k for k in list_datasets if 'refugees-residing' in k]

In [None]:
len(residing)

In [None]:
residing[0:5]

In [None]:
if WRITE_FLAG:
    download_data_from_HDX("../data/raw/residing/", residing)

#### People of concern Originating from a certain country

Define a list with only those datasets that include in their name the string 'refugees-originating':

In [None]:
originating = [k for k in list_datasets if 'refugees-originating' in k ]

In [None]:
len(originating)

In [None]:
originating[0:5]

In [None]:
if WRITE_FLAG:
    download_data_from_HDX("../data/raw/originating/", originating)

#### World Bank social and economic indicators for all countries

Define a list with only those datasets that include in their name the string 'world-bank-indicators-for':

In [None]:
indicators = [k for k in list_datasets if 'world-bank-indicators-for' in k]
indicators = [k for k in indicators if 'showcase' not in k]

In [None]:
len(indicators)

In [None]:
indicators[0:5]

In [None]:
if WRITE_FLAG:
    download_data_from_HDX("../data/raw/indicators/", indicators)