<a href="https://colab.research.google.com/github/1kaiser/test2023/blob/main/GoogleEarthEngine_AnySatellite_Data_Download.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install the library
!pip -q install FireHR==0.1.2 pyhdf==0.10.2

In [None]:
# Authenticate to use Google Earth Engine API
import ee
ee.Authenticate()

In [None]:
!rm -r /content/data

##**COPERNICUS/S2**

In [None]:
from pathlib import Path
from FireHR.data import *

# Bounding box coordinates
left   = 75.979728
right  = 77.866667
bottom = 31.453599
top    = 32.416667

path_save   = Path('data')
products    = ["COPERNICUS/S2"]  # Product id in google earth engine
bands       = ['B4', 'B3', 'B2'] # Red, Green, Blue

R = RegionST(name         = 'TeslaGigaBerlin', 
             bbox         = [left,bottom,right,top], 
             scale_meters = 10, 
             time_start   = '2021-03-01', 
             time_end     = '2021-04-25')

# Download time series
# download_data_ts(R, products, bands, path_save)

time_window = R.times[0], R.times[-1]

# Download median composite of the 3 least cloudy images within the time_window
download_data(R, time_window, products, bands, path_save, use_least_cloudy=3, show_progress=True)

#download_data_ts(R, products, bands, path_save, show_progress=True)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from banet.data import open_tif

brightness = 3
im = np.concatenate([open_tif(f'data/download.{b}.tif').read() for b in bands])
im = im.transpose(1,2,0).astype(np.float32)/10000
plt.imshow(brightness*im)

##**GEDI**

In [None]:
from pathlib import Path
from FireHR.data import *

# Bounding box coordinates
left   = 75.979728
right  = 77.866667
bottom = 31.453599
top    = 32.416667

path_save   = Path('data')
products    = ["LARSE/GEDI/GEDI02_A_002_MONTHLY"]  # Product id in google earth engine
bands       = ['rh98', 'rh38', 'rh10'] # Red, Green, Blue

R = RegionST(name         = 'TeslaGigaBerlin', 
             bbox         = [left,bottom,right,top], 
             scale_meters = 10, 
             time_start   = '2020-03-01', 
             time_end     = '2020-04-25')

# Download time series
# download_data_ts(R, products, bands, path_save)

time_window = R.times[0], R.times[-1]

# Download median composite of the 3 least cloudy images within the time_window
download_data(R, time_window, products, bands, path_save, use_least_cloudy=3, show_progress=True)

#download_data_ts(R, products, bands, path_save, show_progress=True)

##**EO1/HYPERION**

In [None]:
from pathlib import Path
from FireHR.data import *

# Bounding box coordinates
left   = 75.979728
right  = 77.866667
bottom = 31.453599
top    = 32.416667

path_save   = Path('data')
products    = ["EO1/HYPERION"]  # Product id in google earth engine
bands       = ['B104', 'B140', 'B221'] # Red, Green, Blue

R = RegionST(name         = 'TeslaGigaBerlin', 
             bbox         = [left,bottom,right,top], 
             scale_meters = 10, 
             time_start   = '2015-03-01', 
             time_end     = '2016-07-25')

# Download time series
# download_data_ts(R, products, bands, path_save)

time_window = R.times[0], R.times[-1]

# Download median composite of the 3 least cloudy images within the time_window
download_data(R, time_window, products, bands, path_save, use_least_cloudy=3, show_progress=True)

#download_data_ts(R, products, bands, path_save, show_progress=True)

##**JAXA/ALOS/PALSAR-2/Level2_2/ScanSAR**

In [None]:
from pathlib import Path
from FireHR.data import *

# Bounding box coordinates
left   = 75.979728
right  = 77.866667
bottom = 31.453599
top    = 32.416667

path_save   = Path('data')
products    = ["JAXA/ALOS/PALSAR-2/Level2_2/ScanSAR"]  # Product id in google earth engine
bands       = ['HH', 'HV'] # Red, Green, Blue

R = RegionST(name         = 'TeslaGigaBerlin', 
             bbox         = [left,bottom,right,top], 
             scale_meters = 10, 
             time_start   = '2020-03-01', 
             time_end     = '2020-04-25')

# Download time series
# download_data_ts(R, products, bands, path_save)

time_window = R.times[0], R.times[-1]

# Download median composite of the 3 least cloudy images within the time_window
download_data(R, time_window, products, bands, path_save, use_least_cloudy=3, show_progress=True)

#download_data_ts(R, products, bands, path_save, show_progress=True)

##**bare code**

In [None]:
#export
import ee
import os
import requests
import rasterio
import pandas as pd
import numpy as np
import zipfile
import json
from IPython.core.debugger import set_trace
from pathlib import Path
import warnings
from fastprogress.fastprogress import progress_bar
from banet.geo import open_tif, merge, Region
from banet.geo import downsample
     

#export
class RegionST(Region):
    "Defines a region in space and time with a name, a bounding box and the pixel size."
    def __init__(self, name:str, bbox:list, pixel_size:float=None, scale_meters:int=None,
                 time_start:str=None, time_end:str=None, time_freq:str='D', time_margin:int=0,
                 shape:tuple=None, epsg=4326):
        if scale_meters is not None and pixel_size is not None: 
            raise Exception('Either pixel_size or scale_meters must be set to None.')
        self.name = name
        self.bbox = rasterio.coords.BoundingBox(*bbox) # left, bottom, right, top
        if pixel_size is not None:
            self.pixel_size = pixel_size
        else:
            self.pixel_size = scale_meters/111000
        self.epsg         = epsg
        self.scale_meters = scale_meters
        self._shape       = shape
        self.time_start   = pd.Timestamp(str(time_start))
        self.time_end     = pd.Timestamp(str(time_end))
        self.time_margin  = time_margin
        self.time_freq    = time_freq

    @property
    def shape(self):
        "Shape of the region (height, width)"
        if self._shape is None:
            return (self.height, self.width)
        else: return self._shape
        
    @property
    def times(self):
        "Property that computes the date_range for the region."
        tstart = self.time_start - pd.Timedelta(days=self.time_margin)
        tend = self.time_end + pd.Timedelta(days=self.time_margin)
        return pd.date_range(tstart, tend, freq=self.time_freq)

    @classmethod
    def load(cls, file, time_start=None, time_end=None):
        "Loads region information from json file"
        with open(file, 'r') as f:
            args = json.load(f)
        if time_start is None:
            time_start = args['time_start']
        if time_end is None:
            time_end = args['time_end']
        return cls(args['name'], args['bbox'], args['pixel_size'],
                   time_start=time_start, time_end=time_end)
    
def extract_region(df_row, cls=Region):
    "Create Region object from a row of the metadata dataframe."
    if issubclass(cls, RegionST):
        return cls(df_row.event_id, df_row.bbox, df_row.pixel_size, 
                   df_row.time_start, df_row.time_end)
    elif issubclass(cls, Region):
        return cls(df_row.event_id, df_row.bbox, df_row.pixel_size)
    else: raise NotImplemented('cls must be one of the following [Region, RegionST]')
     

#export
def coords2bbox(lon, lat, pixel_size): 
    return [lon.min(), lat.min(), lon.max()+pixel_size, lat.max()+pixel_size]

def split_region(region:RegionST, size:int, cls=Region):
    lon, lat = region.coords()
    Nlon = (len(lon)//size)*size
    Nlat = (len(lat)//size)*size
    lons = [*lon[:Nlon].reshape(-1, size), lon[Nlon:][None]]
    lats = [*lat[:Nlat].reshape(-1, size), lat[Nlat:][None]]
    if len(lats[-1].reshape(-1)) == 0 and len(lons[-1].reshape(-1)) == 0:
        lons = lons[:-1]
        lats = lats[:-1]
    #lons = lon.reshape(-1, size)
    #lats = lat.reshape(-1, size)
    if issubclass(cls, RegionST):
        return [cls('', coords2bbox(ilon, ilat, region.pixel_size), 
                    pixel_size=region.pixel_size, time_start=region.time_start,
                    time_end=region.time_end, time_freq=region.time_freq,
                    time_margin=region.time_margin) for ilon in lons for ilat in lats]
    elif issubclass(cls, Region):
        return [cls('', coords2bbox(ilon, ilat, region.pixel_size), pixel_size=region.pixel_size) 
            for ilon in lons for ilat in lats]
    else: raise NotImplemented('cls must be one of the following [Region, RegionST]')
        
    return 
            
def merge_tifs(files:list, fname:str, delete=False):
    data, tfm = merge([open_tif(str(f)) for f in files])
    data = data.squeeze()
    fname = Path(files[0]).parent/fname
    profile = open_tif(str(files[0])).profile
    with rasterio.Env():
        height, width = data.shape
        profile.update(width=width, height=height, transform=tfm, compress='lzw')
        with rasterio.open(str(fname), 'w', **profile) as dst:
            dst.write(data, 1)
    if delete:
        for f in files: os.remove(f)
     

#export
def filter_region(image_collection:ee.ImageCollection, region:RegionST, times:tuple, bands=None):
    image_collection = image_collection.filterDate(times[0], times[1])
    geometry = ee.Geometry.Rectangle(region.bbox)
    image_collection = image_collection.filterBounds(geometry)
    if bands is not None:
        image_collection = image_collection.select(bands)
    return image_collection

def filter_cloudy(image_collection:ee.ImageCollection, max_cloud_fraction=0.2):
    return image_collection.filterMetadata(
        'CLOUDY_PIXEL_PERCENTAGE', 'not_greater_than', max_cloud_fraction)

def n_least_cloudy(image_collection:ee.ImageCollection, n=5):
    image_collection = image_collection.sort(prop='CLOUDY_PIXEL_PERCENTAGE')
    image_collection = image_collection.toList(image_collection.size())
    colsize = image_collection.size().getInfo()
    if colsize < n: 
        warnings.warn(f'Total number of images in the collection {colsize} less than n={n}. Setting n={colsize}')
        n = colsize
    image_collection = ee.ImageCollection([ee.Image(image_collection.get(i)) for i in range(n)])
    return image_collection

def download_topography_data(R:RegionST, path_save=Path('.'), scale=None, 
                             download_crop_size=1000, show_progress=False):
    if scale is None: scale = R.scale_meters
    ee.Initialize()
    image = ee.Image('srtm90_v4')
    path_save.mkdir(exist_ok=True, parents=True)
    sR = [R] if min(R.shape) <= download_crop_size else split_region(R, size=download_crop_size)
    if not (path_save/'srtm90_v4.elevation.tif').is_file():
        files = []
        loop = enumerate(sR) if not show_progress else progress_bar(enumerate(sR),total=len(sR))
        for j, R in loop:
            region = (f"[[{R.bbox.left}, {R.bbox.bottom}], [{R.bbox.right}, {R.bbox.bottom}], " +
              f"[{R.bbox.right}, {R.bbox.top}], [{R.bbox.left}, {R.bbox.top}]]")
            url = image.getDownloadUrl(
                {'scale': scale, 'crs': 'EPSG:4326', 'region': f'{region}'})
            r = requests.get(url)
            with open(str(path_save/'data.zip'), 'wb') as f:
                f.write(r.content)
            with zipfile.ZipFile(str(path_save/'data.zip'), 'r') as f:
                f.extractall(str(path_save))
                os.rename(str(path_save/'srtm90_v4.elevation.tif'),
                          str(path_save/f'srtm90_v4.elevation_{j}.tif'))
                files.append(str(path_save/f'srtm90_v4.elevation_{j}.tif'))
            os.remove(str(path_save/'data.zip'))
        merge_tifs(files, 'srtm90_v4.elevation.tif', delete=True)

def download_data(R:RegionST, times, products, bands, path_save, scale=None, max_cloud_fraction=None,
                  use_least_cloudy=None, download_crop_size=1000, show_progress=False):
    if scale is None: scale = R.scale_meters
    ee.Initialize()
    path_save.mkdir(exist_ok=True, parents=True)
    if not ((path_save/f'download.{bands[0]}.tif').is_file() and 
           (path_save/f'download.{bands[1]}.tif').is_file() and
           (path_save/f'download.{bands[2]}.tif').is_file()):
        sR = [R] if min(R.shape) <= download_crop_size else split_region(R, size=download_crop_size, cls=RegionST)
        fsaves = []
        #for j, R in tqdm(enumerate(sR), total=len(sR)):
        loop = enumerate(sR) if not show_progress else progress_bar(enumerate(sR),total=len(sR))
        for j, R in loop:
            region = (f"[[{R.bbox.left}, {R.bbox.bottom}], [{R.bbox.right}, {R.bbox.bottom}], " +
                       f"[{R.bbox.right}, {R.bbox.top}], [{R.bbox.left}, {R.bbox.top}]]")

            if not ((path_save/f'download.{bands[0]}_{j}.tif').is_file() and 
                   (path_save/f'download.{bands[1]}_{j}.tif').is_file() and
                   (path_save/f'download.{bands[2]}_{j}.tif').is_file()):
                # Merge products to single image collection
                imCol = ee.ImageCollection(products[0])
                for i in range(1, len(products)):
                    imCol = imCol.merge(ee.ImageCollection(products[i]))
                imCol = filter_region(imCol, R, times=times, bands=bands)
                if max_cloud_fraction is not None:
                    imCol = filter_cloudy(imCol, max_cloud_fraction=max_cloud_fraction)
                if use_least_cloudy is not None:
                    imCol = n_least_cloudy(imCol, n=use_least_cloudy)
                im = imCol.median()
                imCol = ee.ImageCollection([im])
                colList = imCol.toList(imCol.size())
                # info = colList.getInfo()
                # data_times = [pd.to_datetime(o['properties']['system:time_start'], unit='ms') for o in info]
                # data_cloudy = [o['properties']['CLOUDY_PIXEL_PERCENTAGE'] for o in info]
                # Download each image
                for i in range(colList.size().getInfo()):
                    image = ee.Image(colList.get(i))
                    fname = 'download'
                    #fname = image.get('system:id').getInfo().split('/')[-1]
                    fnames_full = [f'{fname}.{b}.tif' for b in bands]
                    fnames_partial0 = [f'{fname}.{b}_{j}.tif' for b in bands]
                    fnames_full = all([(path_save/f).is_file() for f in fnames_full])
                    fnames_partial = all([(path_save/f).is_file() for f in fnames_partial0])
                    if not fnames_full:
                        fsaves.append([path_save/f for f in fnames_partial0])
                        if not fnames_partial:
                            zip_error = True
                            for i in range(10): # Try 10 times
                                if zip_error:
                                    try:
                                        url = image.getDownloadURL(
                                            {'scale': scale, 'crs': 'EPSG:4326', 
                                             'region': f'{region}'})
                                        r = requests.get(url)
                                        with open(str(path_save/'data.zip'), 'wb') as f:
                                            f.write(r.content)
                                        with zipfile.ZipFile(str(path_save/'data.zip'), 'r') as f:
                                            files = f.namelist()
                                            f.extractall(str(path_save))
                                        os.remove(str(path_save/'data.zip'))
                                        zip_error = False
                                    except:
                                        zip_error = True
                                        os.remove(str(path_save/'data.zip'))
                                        time.sleep(10)
                            if zip_error: raise Exception(f'Failed to process {url}')
                            for f in files:
                                f = path_save/f
                                os.rename(str(f), str(path_save/f'{f.stem}_{j}{f.suffix}'))
        # Merge files
        suffix = '.tif'
        files = path_save.ls(include=[suffix])
        #files = np.unique(fsaves) 
        files = [o.stem for o in files]
        ref = np.unique(['_'.join(o.split('_')[:-1]) 
                         for o in files if len(o.split('_')[-1]) < 6])
        ids = np.unique([int(o.split('_')[-1]) 
                         for o in files if len(o.split('_')[-1]) < 6])
        #file_groups = [[path_save/f'{r}_{i}{suffix}' for i in ids] for r in ref] 
        file_groups = [[path_save/f'{r}_{i}{suffix}' for i in ids 
                    if f'{r}_{i}' in files] for r in ref] 
        for fs in file_groups:
            if len(fs) < 500:
                fsave = '_'.join(fs[0].stem.split('_')[:-1]) + suffix
                merge_tifs(fs, fsave, delete=True)
            else:
                fs_break = np.array(fs)[:(len(fs)//500)*500].reshape(len(fs)//500,-1).tolist()
                if len(fs[(len(fs)//500)*500:]) > 0:
                    fs_break.append(fs[(len(fs)//500)*500:])
                for fsi, fs2 in enumerate(fs_break):
                    fsave = '_'.join(fs2[0].stem.split('_')[:-1]) + f'_break{fsi}' + suffix
                    merge_tifs(fs2, fsave, delete=True)

        files = path_save.ls(include=[suffix, '_break'])
        files = [o.stem for o in files]
        ref = np.unique(['_'.join(o.split('_')[:-1]) 
                         for o in files if len(o.split('_')[-1]) < 11])
        ids = np.unique([o.split('_')[-1]
                         for o in files if len(o.split('_')[-1]) < 11])
        #file_groups = [[path_save/f'{r}_{i}{suffix}' for i in ids] for r in ref] 
        file_groups = [[path_save/f'{r}_{i}{suffix}' for i in ids 
                    if f'{r}_{i}' in files] for r in ref] 
        for fs in file_groups:
            fsave = '_'.join(fs[0].stem.split('_')[:-1]) + suffix
            merge_tifs(fs, fsave, delete=True)
            
def download_data_ts(R:RegionST, products, bands, path_save, scale=None, 
                     download_crop_size=1000, show_progress=False):
    if scale is None: scale = R.scale_meters
    ee.Initialize()
    times = (R.times[0], R.times[-1])
    path_save.mkdir(exist_ok=True, parents=True)
    sR = [R] if min(R.shape) <= download_crop_size else split_region(R, size=download_crop_size, cls=RegionST)
    loop = enumerate(sR) if not show_progress else progress_bar(enumerate(sR),total=len(sR))
    for j, R in loop:
        region = (f"[[{R.bbox.left}, {R.bbox.bottom}], [{R.bbox.right}, {R.bbox.bottom}], " + f"[{R.bbox.right}, {R.bbox.top}], [{R.bbox.left}, {R.bbox.top}]]")

        # Merge products to single image collection
        imCol = ee.ImageCollection(products[0])
        for i in range(1, len(products)):
            imCol = imCol.merge(ee.ImageCollection(products[i]))
        imCol = filter_region(imCol, R, times=times, bands=bands)
        imCol = ee.ImageCollection(imCol)
        colList = imCol.toList(imCol.size())

        # Download each image
        for i in range(colList.size().getInfo()):
            image = ee.Image(colList.get(i))
            zip_error = True
            for i in range(10): # Try 10 times
                if zip_error:
                    try:
                        url = image.getDownloadURL({'scale': scale, 'crs': 'EPSG:4326','region': f'{region}'})
                        r = requests.get(url)
                        with open(str(path_save/'data.zip'), 'wb') as f:
                            f.write(r.content)
                        with zipfile.ZipFile(str(path_save/'data.zip'), 'r') as f:
                            files = f.namelist()
                            f.extractall(str(path_save))
                        os.remove(str(path_save/'data.zip'))
                        zip_error = False
                    except:
                        zip_error = True
                        os.remove(str(path_save/'data.zip'))
                        time.sleep(10)
            if zip_error: raise Exception(f'Failed to process {url}')
            for f in files:
                f = path_save/f
                os.rename(str(f), str(path_save/f'{f.stem}_{j}{f.suffix}'))
                
    # Merge files
    suffix = '.tif'
    files = path_save.ls(include=[suffix])
    files = [o.stem for o in files]
    ref = np.unique(['_'.join(o.split('_')[:-1]) 
                     for o in files if len(o.split('_')[-1]) < 6])
    ids = np.unique([int(o.split('_')[-1]) 
                     for o in files if len(o.split('_')[-1]) < 6])
    file_groups = [[path_save/f'{r}_{i}{suffix}' for i in ids 
                if f'{r}_{i}' in files] for r in ref] 
    for fs in file_groups:
        if len(fs) < 500:
            fsave = '_'.join(fs[0].stem.split('_')[:-1]) + suffix
            merge_tifs(fs, fsave, delete=True)
        else:
            fs_break = np.array(fs)[:(len(fs)//500)*500].reshape(len(fs)//500,-1).tolist()
            if len(fs[(len(fs)//500)*500:]) > 0:
                fs_break.append(fs[(len(fs)//500)*500:])
            for fsi, fs2 in enumerate(fs_break):
                fsave = '_'.join(fs2[0].stem.split('_')[:-1]) + f'_break{fsi}' + suffix
                merge_tifs(fs2, fsave, delete=True)

    files = path_save.ls(include=[suffix, '_break'])
    files = [o.stem for o in files]
    ref = np.unique(['_'.join(o.split('_')[:-1]) 
                     for o in files if len(o.split('_')[-1]) < 11])
    ids = np.unique([o.split('_')[-1]
                     for o in files if len(o.split('_')[-1]) < 11])
    file_groups = [[path_save/f'{r}_{i}{suffix}' for i in ids 
                if f'{r}_{i}' in files] for r in ref] 
    for fs in file_groups:
        fsave = '_'.join(fs[0].stem.split('_')[:-1]) + suffix
        merge_tifs(fs, fsave, delete=True)

##**fun**

In [None]:
from pathlib import Path

# Bounding box coordinates
left   = 75.979728
right  = 77.866667
bottom = 31.453599
top    = 32.416667

path_save   = Path('data')
products    = ["JAXA/ALOS/PALSAR-2/Level2_2/ScanSAR"]  # Product id in google earth engine
bands       = ['HH', 'HV'] # Red, Green, Blue

R = RegionST(name         = 'TeslaGigaBerlin', 
             bbox         = [left,bottom,right,top], 
             scale_meters = 10, 
             time_start   = '2020-05-30', 
             time_end     = '2020-06-20')

# Download time series
# download_data_ts(R, products, bands, path_save)

time_window = R.times[0], R.times[-1]

# Download median composite of the 3 least cloudy images within the time_window
#download_data(R, time_window, products, bands, path_save, use_least_cloudy=3, show_progress=True)

download_data_ts(R, products, bands, path_save, show_progress=True)

##**test parallel**

In [None]:
import requests
import time
from multiprocessing import cpu_count
from multiprocessing.pool import ThreadPool
from google.colab import output


###########################################################################################################################
def download_url(url):
    !curl -L -O --remote-header-name \
      --header "Authorization: Bearer PNPKuJSlDQavPosdUzw4DyJRP0-P4zNr3ME4ovJPvjyqM3bOxCPB1JbDHczTZrUEb2YfpKfokClVoKOH5yzoDw" \
      --location {url}
###########################################################################################################################
        

def download_parallel(args):
    cpus = cpu_count()
    results = ThreadPool(cpus - 1).imap_unordered(download_url, args)
    for result in results:
        print('time (s):', result)
        # output.clear()
###########################################################################################################################



file1 = open("/content/url.txt", 'r')
###########################################################################################################################
download_parallel(file1)

In [16]:
from multiprocessing import cpu_count
from multiprocessing.pool import ThreadPool          
cpus = cpu_count()
def download_data_ts(R:RegionST, products, bands, path_save, scale=None, 
                     download_crop_size=1000, show_progress=False):
    if scale is None: scale = R.scale_meters
    ee.Initialize()
    times = (R.times[0], R.times[-1])
    path_save.mkdir(exist_ok=True, parents=True)
    sR = [R] if min(R.shape) <= download_crop_size else split_region(R, size=download_crop_size, cls=RegionST)
    loop = enumerate(sR) if not show_progress else progress_bar(enumerate(sR),total=len(sR))
    print(loop)
    for j, R in loop:
        region = (f"[[{R.bbox.left}, {R.bbox.bottom}], [{R.bbox.right}, {R.bbox.bottom}], " + f"[{R.bbox.right}, {R.bbox.top}], [{R.bbox.left}, {R.bbox.top}]]")

        # Merge products to single image collection
        imCol = ee.ImageCollection(products[0])
        for i in range(1, len(products)):
            imCol = imCol.merge(ee.ImageCollection(products[i]))
        imCol = filter_region(imCol, R, times=times, bands=bands)
        imCol = ee.ImageCollection(imCol)
        colList = imCol.toList(imCol.size())
        print(colList.size().getInfo())

        # Download each image
        results = ThreadPool(cpus - 1).imap_unordered(download_url, colList.size().getInfo())
        for result in results:
          print('time (s):', result)

                
    # Merge files
    suffix = '.tif'
    files = path_save.ls(include=[suffix])
    files = [o.stem for o in files]
    ref = np.unique(['_'.join(o.split('_')[:-1]) 
                     for o in files if len(o.split('_')[-1]) < 6])
    ids = np.unique([int(o.split('_')[-1]) 
                     for o in files if len(o.split('_')[-1]) < 6])
    file_groups = [[path_save/f'{r}_{i}{suffix}' for i in ids 
                if f'{r}_{i}' in files] for r in ref] 
    for fs in file_groups:
        if len(fs) < 500:
            fsave = '_'.join(fs[0].stem.split('_')[:-1]) + suffix
            merge_tifs(fs, fsave, delete=True)
        else:
            fs_break = np.array(fs)[:(len(fs)//500)*500].reshape(len(fs)//500,-1).tolist()
            if len(fs[(len(fs)//500)*500:]) > 0:
                fs_break.append(fs[(len(fs)//500)*500:])
            for fsi, fs2 in enumerate(fs_break):
                fsave = '_'.join(fs2[0].stem.split('_')[:-1]) + f'_break{fsi}' + suffix
                merge_tifs(fs2, fsave, delete=True)

    files = path_save.ls(include=[suffix, '_break'])
    files = [o.stem for o in files]
    ref = np.unique(['_'.join(o.split('_')[:-1]) 
                     for o in files if len(o.split('_')[-1]) < 11])
    ids = np.unique([o.split('_')[-1]
                     for o in files if len(o.split('_')[-1]) < 11])
    file_groups = [[path_save/f'{r}_{i}{suffix}' for i in ids 
                if f'{r}_{i}' in files] for r in ref] 
    for fs in file_groups:
        fsave = '_'.join(fs[0].stem.split('_')[:-1]) + suffix
        merge_tifs(fs, fsave, delete=True)

In [10]:
def download_url(input, colList):
  image = ee.Image(list.get(input))
  zip_error = True
  for i in range(10): # Try 10 times
      if zip_error:
          try:
              url = image.getDownloadURL(
                  {'scale': scale, 'crs': 'EPSG:4326', 
                    'region': f'{region}'})
              r = requests.get(url)
              with open(str(path_save/'data.zip'), 'wb') as f:
                  f.write(r.content)
              with zipfile.ZipFile(str(path_save/'data.zip'), 'r') as f:
                  files = f.namelist()
                  f.extractall(str(path_save))
              os.remove(str(path_save/'data.zip'))
              zip_error = False
          except:
              zip_error = True
              os.remove(str(path_save/'data.zip'))
              time.sleep(10)
  if zip_error: raise Exception(f'Failed to process {url}')
  for f in files:
      f = path_save/f
      os.rename(str(f), str(path_save/f'{f.stem}_{j}{f.suffix}'))