In [1]:
import logging
import os


from concurrent.futures import ThreadPoolExecutor

In [2]:
import ee
import geemap
import pandas as pd

from pathlib import Path
from tqdm import tqdm

In [3]:
ee.Authenticate()

True

In [4]:
ee.Initialize(project='forecasting-poverty')

In [5]:
dataset_path = Path.cwd().parent / 'datasets'

In [6]:
clean_path = dataset_path / 'clean'

# Load Conglomerate

In [7]:
df = pd.read_pickle(clean_path / 'conglomerate.pkl')

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5359 entries, 0 to 5358
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   year          5359 non-null   int64  
 1   month         5359 non-null   int64  
 2   conglomerate  5359 non-null   object 
 3   longitude     5359 non-null   float64
 4   latitude      5359 non-null   float64
 5   adequate      5359 non-null   bool   
dtypes: bool(1), float64(2), int64(2), object(1)
memory usage: 214.7+ KB


#  Download Conglomerate Images

In [9]:
logging.basicConfig(
    filename='error.log',
    level=logging.ERROR,
    format='%(asctime)s %(levelname)s: %(message)s'
)

In [10]:
def download_conglomerate_image(conglome):
    try:
        point = ee.Geometry.Point(
            conglome.longitude, conglome.latitude
        )
        region = point.buffer(1120).bounds()

        start_date = ee.Date.fromYMD(conglome.year, conglome.month, 1)
        end_date = start_date.advance(1, 'month')

        image = (
            ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
            .filterBounds(point)
            .filterDate(start_date, end_date)
            .sort('CLOUDY_PIXEL_PERCENTAGE')
            .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 10))
            .first()
            .select(['B4', 'B3', 'B2'])
            .clip(region)
        )

        params = {
            'region': region,
            'scale': 10,
            'crs': 'EPSG:4326',
            'format': 'GEO_TIFF'
        }

        download_url = image.getDownloadURL(params)
        output_filename = clean_path / 'rgb' / f'{conglome.Index:04}.tif'

        os.system(f'wget -q -O {output_filename} "{download_url}"')
    except Exception as e:
        logging.exception(f'Error downloading image {conglome.Index:04}.')

In [11]:
with ThreadPoolExecutor() as executor:
    list(tqdm(
        executor.map(download_conglomerate_image, df.itertuples()),
        total=len(df), 
        desc='Downloading Images'
    ))

Downloading Images: 100%|██████████| 5359/5359 [09:24<00:00,  9.49it/s]
