In [None]:
import logging
import os

from concurrent.futures import ThreadPoolExecutor

In [None]:
import ee
import pandas as pd

from pathlib import Path
from tqdm import tqdm

In [None]:
ee.Authenticate()

In [None]:
ee.Initialize(project='forecasting-poverty')

In [None]:
dataset_path = Path.cwd().parent / 'datasets'

In [None]:
clean_path = dataset_path / 'clean'

# Load Conglomerate

In [None]:
df = pd.read_pickle(clean_path / 'conglomerate.pkl')

In [None]:
df.info()

#  Download Conglomerate Images

In [None]:
logging.basicConfig(
    filename='error.log',
    level=logging.ERROR,
    format='%(asctime)s %(levelname)s: %(message)s'
)

In [None]:
def download_conglomerate_image(conglome):
    try:
        point = ee.Geometry.Point(
            conglome.longitude, conglome.latitude
        )
        region = point.buffer(1120).bounds()

        start_date = ee.Date.fromYMD(conglome.year, conglome.month, 1)
        end_date = start_date.advance(1, 'month')

        image = (
            ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
            .filterBounds(point)
            .filterDate(start_date, end_date)
            .sort('CLOUDY_PIXEL_PERCENTAGE')
            .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 10))
            .first()
            .select(['B4', 'B3', 'B2'])
            .clip(region)
        )

        params = {
            'region': region,
            'scale': 10,
            'crs': 'EPSG:4326',
            'format': 'GEO_TIFF'
        }

        download_url = image.getDownloadURL(params)
        output_filename = clean_path / 'rgb' / f'{conglome.Index:04}.tif'

        os.system(f'wget -q -O {output_filename} "{download_url}"')
    except Exception as e:
        logging.exception(f'Error downloading image {conglome.Index:04}.')

In [None]:
with ThreadPoolExecutor() as executor:
    list(tqdm(
        executor.map(download_conglomerate_image, df.itertuples()),
        total=len(df), 
        desc='Downloading Images'
    ))