# New Landsat exporter

Gets a three-year median image for each location as a np array

In [1]:
import ee
import os
import requests
import multiprocessing
import pandas as pd
import numpy as np
from io import BytesIO
from pathlib import Path
from retry import retry
from preprocessing.GEE.landsat_exporter import LandsatExporter
import configparser

# Read config file
config = configparser.ConfigParser()
config.read('config.ini')

# # Authenticate with Google account (uncomment to authenticate)
# ee.Authenticate()

# Initialize Google Earth Engine with the High-volume endpoint
# For more info, see https://developers.google.com/earth-engine/cloud/highvolume
ee.Initialize(opt_url='https://earthengine-highvolume.googleapis.com')

*** Earth Engine *** Share your feedback by taking our Annual Developer Satisfaction Survey: https://google.qualtrics.com/jfe/form/SV_0JLhFqfSY1uiEaW?source=Init


In [2]:
DATA_DIR = config['PATHS']['DATA_DIR']
DOWNLOAD_DIR = os.path.join(DATA_DIR, 'dhs_images')
IMAGE_WIDTH = 6720

df = pd.read_csv(os.path.join(DATA_DIR, 'dhs_data.csv'))
df.head()

Unnamed: 0,cluster_id,lon,lat,rural,region_id,country,survey,month,year,iwi
0,AO.Bengo.71.135,13.640789,-8.589805,False,AO.Bengo,Angola,Angola 2015-16 Standard DHS,11,2015,62.334459
1,AO.Bengo.71.158,14.122619,-7.718385,True,AO.Bengo,Angola,Angola 2015-16 Standard DHS,2,2016,8.226589
2,AO.Bengo.71.169,13.654425,-8.592545,False,AO.Bengo,Angola,Angola 2015-16 Standard DHS,10,2015,62.760211
3,AO.Bengo.71.203,13.517859,-8.65226,True,AO.Bengo,Angola,Angola 2015-16 Standard DHS,1,2016,68.211697
4,AO.Bengo.71.208,13.721998,-7.852511,True,AO.Bengo,Angola,Angola 2015-16 Standard DHS,11,2015,14.825944


Export images

In [11]:
@retry(tries=10, delay=1, backoff=2)
def export_row_loc(row):
    
    id = row['cluster_id']
    file_name = os.path.join(DOWNLOAD_DIR, id, 'landsat.np')
    
    # Check if sample already downloaded
    if os.path.exists(file_name):
        return
    
    # Ensure download directory exists
    Path(os.path.join(DOWNLOAD_DIR, id)).mkdir(parents=True, exist_ok=True)
    
    loc = ee.Geometry.Point([row['lon'], row['lat']])
    
    # Define end_date (survey date) and start_date (three years before)
    year = int(row['year'])
    month = max(int(row['month']), 1) # Set surveys with month 0 to month 1

    end_date = f'{year:04d}-{month:02d}-01'

    start_year = year - 3
    start_date = f'{start_year:04d}-{month:02d}-01'

    loc_collection = LandsatExporter.get_collection(loc, start_date, end_date)
    
    # Get the median pixel values
    dwnld_img = loc_collection.median()

    # Get a IMAGE_WIDTH x IMAGE_WIDTH meter frame around cluster coordinate
    loc_bbox = loc.buffer(IMAGE_WIDTH/2).bounds()
    
    try:
        # Try to get the download URL
        loc_download_url = dwnld_img.getDownloadURL({
            'name': id,
            'region': loc_bbox,
            'dimensions': [224, 224],
            'filePerBand': False,
            'format': 'NPY'
        })
    except ee.EEException as e:
        # Check if the error message is "Expression evaluates to an image with no bands"
        if "Expression evaluates to an image with no bands" in str(e):
            # Catch only the desired exception and print the error message
            print(f'Error for row {id}: {str(e)}')
            return
        else:
            # Let other exceptions pass through
            print(f'Error for row {id}: {str(e)}')
            raise e
    
    r = requests.get(loc_download_url) # send get request
    
    if r.status_code == 200:  # HTTP GET: 200 OK
        
        # Load bytes as numpy array (bands as np.void)
        img = np.load(BytesIO(r.content))
        
        # Convert to a standard 3D numpy array (width, height, channels)
        img = np.stack([img[field] for field in img.dtype.names], axis=-1)
        
        # Save retrieved image
        with open(file_name, 'wb') as out_file:
              np.save(out_file, img)
    # retry, get request failed
    else:
        print(f'{r.status_code}: {r.reason}')
        raise HTTPException(status_code=r.status_code, detail=r.reason)

Some places evaluates to "an image with no bands". This means that not a single cloud-free image was available in the three years leading up to the survey.

In [12]:
# Get samples as list, since multiprocessing doesn't work with dataframes
clusters = [row for _, row in df.iterrows()]

n_workers = 20
pool = multiprocessing.Pool(n_workers)
pool.map(export_row_loc, clusters)
pool.close()
pool.join()

Error for row CF.Rs Iv.31.427: Expression evaluates to an image with no bands.
Error for row BJ.Ouémé.31.50103030: Expression evaluates to an image with no bands.
Error for row CI.South West.35.78: Expression evaluates to an image with no bands.
Error for row CM.Est.22.138: Expression evaluates to an image with no bands.
Error for row BJ.Borgou.31.31401040: Expression evaluates to an image with no bands.
Error for row BJ.Borgou.31.31405054: Expression evaluates to an image with no bands.
Error for row CI.South West.35.79: Expression evaluates to an image with no bands.
Error for row BJ.Atacora.31.10151020: Expression evaluates to an image with no bands.
Error for row BJ.Ouémé.31.50151005: Expression evaluates to an image with no bands.
Error for row CF.Rs Iv.31.428: Expression evaluates to an image with no bands.
Error for row CM.Est.22.139: Expression evaluates to an image with no bands.
Error for row CM.Yaoundé.douala.22.73: Expression evaluates to an image with no bands.
Error for r