##### Imports

In [10]:
import os
import time
import ee
import requests
import shutil, sys
import logging
import multiprocessing
from retry import retry

### Earth Engine Authentication

In [11]:
#ee.Authenticate()

In [12]:
ee.Initialize(opt_url='https://earthengine-highvolume.googleapis.com')

### Data Loading

#### Country selection

In [13]:
# ---> ENTER COUNTRY BELOW <---
COUNTRY = "Democratic Republic of the Congo"

countries = ee.FeatureCollection("FAO/GAUL/2015/level0")
border = countries.filter(ee.Filter.eq('ADM0_NAME', COUNTRY))
img = ee.Image(1).clip(border)

pointsROI = img.stratifiedSample(numPoints = 1000000,
                                classBand = 'constant',
                                region = border,
                                scale = 10000,
                                geometries = True)

print(f'COUNTRY: {COUNTRY}, POINTS: {pointsROI.size().getInfo()}')

COUNTRY: Democratic Republic of the Congo, POINTS: 23533


#### Datasets

In [14]:
'''# DHS Rural&Urban Maps
ru_ur_1 = ee.Image('projects/ee-albinso/assets/DHS_RuUr_1')
ru_ur_2 = ee.Image('projects/ee-albinso/assets/DHS_RuUr_2')
ru_ur_3 = ee.Image('projects/ee-albinso/assets/DHS_RuUr_3')
ru_ur_4 = ee.Image('projects/ee-albinso/assets/DHS_RuUr_4')
ru_ur_5 = ee.Image('projects/ee-albinso/assets/DHS_RuUr_5')
ru_ur_6 = ee.Image('projects/ee-albinso/assets/DHS_RuUr_6')
ru_ur_7 = ee.Image('projects/ee-albinso/assets/DHS_RuUr_7')
ru_ur_8 = ee.Image('projects/ee-albinso/assets/DHS_RuUr_8')
ru_ur_9 = ee.Image('projects/ee-albinso/assets/DHS_RuUr_9')
ru_ur_10 = ee.Image('projects/ee-albinso/assets/DHS_RuUr_10')

ru_ur_dhs = ee.ImageCollection.fromImages([ru_ur_1,ru_ur_2,ru_ur_3,ru_ur_4,ru_ur_5,
                                                  ru_ur_6,ru_ur_7,ru_ur_8,ru_ur_9,ru_ur_10]).mosaic()'''

"# DHS Rural&Urban Maps\nru_ur_1 = ee.Image('projects/ee-albinso/assets/DHS_RuUr_1')\nru_ur_2 = ee.Image('projects/ee-albinso/assets/DHS_RuUr_2')\nru_ur_3 = ee.Image('projects/ee-albinso/assets/DHS_RuUr_3')\nru_ur_4 = ee.Image('projects/ee-albinso/assets/DHS_RuUr_4')\nru_ur_5 = ee.Image('projects/ee-albinso/assets/DHS_RuUr_5')\nru_ur_6 = ee.Image('projects/ee-albinso/assets/DHS_RuUr_6')\nru_ur_7 = ee.Image('projects/ee-albinso/assets/DHS_RuUr_7')\nru_ur_8 = ee.Image('projects/ee-albinso/assets/DHS_RuUr_8')\nru_ur_9 = ee.Image('projects/ee-albinso/assets/DHS_RuUr_9')\nru_ur_10 = ee.Image('projects/ee-albinso/assets/DHS_RuUr_10')\n\nru_ur_dhs = ee.ImageCollection.fromImages([ru_ur_1,ru_ur_2,ru_ur_3,ru_ur_4,ru_ur_5,\n                                                  ru_ur_6,ru_ur_7,ru_ur_8,ru_ur_9,ru_ur_10]).mosaic()"

In [15]:
# DHS Rural&Urban Maps
ru_ur_1 = ee.Image('users/kakooeimohammd/Africa/RuUr_Africa_Raster5_0')
ru_ur_2 = ee.Image('users/kakooeimohammd/Africa/RuUr_Africa_Raster5_1')
ru_ur_3 = ee.Image('users/kakooeimohammd/Africa/RuUr_Africa_Raster5_2')
ru_ur_4 = ee.Image('users/kakooeimohammd/Africa/RuUr_Africa_Raster5_3')
ru_ur_5 = ee.Image('users/kakooeimohammd/Africa/RuUr_Africa_Raster5_4')
ru_ur_6 = ee.Image('users/kakooeimohammd/Africa/RuUr_Africa_Raster5_5')
ru_ur_7 = ee.Image('users/kakooeimohammd/Africa/RuUr_Africa_Raster5_6')
ru_ur_8 = ee.Image('users/kakooeimohammd/Africa/RuUr_Africa_Raster5_7')
ru_ur_9 = ee.Image('users/kakooeimohammd/Africa/RuUr_Africa_Raster5_8')
ru_ur_10 = ee.Image('users/kakooeimohammd/Africa/RuUr_Africa_Raster5_9')

ru_ur_dhs = ee.ImageCollection.fromImages([ru_ur_1,ru_ur_2,ru_ur_3,ru_ur_4,ru_ur_5,
                                                  ru_ur_6,ru_ur_7,ru_ur_8,ru_ur_9,ru_ur_10]).mosaic()

#### Data retrieval

In [16]:
# specify output directory
OUT_DIR = f'/mimer/NOBACKUP/groups/globalpoverty1/albin_and_albin/raw_data_newest_rural_urban_dhs/{COUNTRY}'

# create directory if it does not exist
if not os.path.isdir(OUT_DIR):
        os.makedirs(OUT_DIR, 0o775)
        print(f'Directory created (mimer): ..{OUT_DIR.partition("globalpoverty1")[-1]}')

Directory created (mimer): ../albin_and_albin/raw_data_newest_rural_urban_dhs/Democratic Republic of the Congo


In [17]:
@retry(tries=10, delay=1, backoff=2)
def get_image_tile(point):
    '''Retrieve image tile at given point, scale and dimension.  Write to directory'''
    
    # get ID and point-coordinates
    ID = point['id']
    point = ee.Geometry.Point(point['geometry']['coordinates'])
    
    # set up rectangular bound around point
    ROI = point.buffer(500*10).bounds()  # tile dim: 1000*1000px (1px=10m)
    
    # images to retrieve
    imgTarget_dhs = ru_ur_dhs.clip(ROI)
    
    # fetch the URL from which to download the image.
    url = imgTarget_dhs.float().getDownloadUrl({
        'scale': 10,
        'dimensions': '1000x1000',
        'format': 'GEO_TIFF'
    })
    r = requests.get(url) # send get request
    
    # save retrieved tile
    if r.status_code == 200:  # HTTP GET: 200 OK
        filename = OUT_DIR + f'/tile_{ID}.tif'
        with open(filename, 'wb') as out_file:
              out_file.write(r.content)
    # retry, get request failed
    else:
        #print(f'{r.status_code}: {r.reason}')
        raise HTTPException(status_code=r.status_code, detail=r.reason)
    
    return r.content


In [18]:
# start pool
pool = multiprocessing.Pool(40)  # earth-engine default parallel request limit: 40
print('Pool started')

# settings for data retrival 
offset = 0
max_chunk_size = 5000  # set to at least 1000 
num_tiles = pointsROI.size().getInfo()
print(f'tiles: {num_tiles}')
print(u'\u2500' * 10)

chunk_idx = 1
num_chunks = (num_tiles // max_chunk_size) + (1 if num_tiles % max_chunk_size != 0 else 0)

# retrieve one data chunk at a time
while offset < num_tiles:
    
    t0 = time.time()
    
    # make a list of the points
    image_points = pointsROI.toList(max_chunk_size, offset).getInfo()

    # get corresponding image tiles
    pool.map(get_image_tile, image_points)
    
    # chunk completed
    t1 = time.time()
    print(f'chunk {chunk_idx} / {num_chunks} done, {offset + len(image_points)} / {num_tiles} tiles, time: {t1 - t0:.1f}s')
    chunk_idx += 1
    offset += max_chunk_size

print(f'Download complete: {COUNTRY}')

#close pool
pool.close()  
pool.join()

Pool started
tiles: 23533
──────────
chunk 1 / 5 done, 5000 / 23533 tiles, time: 213.4s
chunk 2 / 5 done, 10000 / 23533 tiles, time: 197.5s
chunk 3 / 5 done, 15000 / 23533 tiles, time: 165.6s
chunk 4 / 5 done, 20000 / 23533 tiles, time: 144.8s
chunk 5 / 5 done, 23533 / 23533 tiles, time: 102.7s
Download complete: Democratic Republic of the Congo
