# Google Earth Engine Batch Downloading Script

Script edited from https://github.com/KuntaHu/GEE_batch_download

Useful resources:
- https://code.earthengine.google.com
- https://github.com/google/earthengine-api
- https://github.com/google/earthengine-api/blob/master/python/examples/ipynb/authorize_notebook_server.ipynb

In [None]:
!pip install google-api-python-client
!pip install oauth2client
!pip install earthengine-api

In [None]:
!earthengine upload image -h

In [1]:
import ee

In [2]:
try:
  ee.Initialize()
  print('The Earth Engine package initialized successfully!')
except ee.EEException as e:
  print('The Earth Engine package failed to initialize!')
except:
    print("Unexpected error:", sys.exc_info()[0])
    raise

The Earth Engine package initialized successfully!


In [None]:
### ONLY if authentication failed ###
!earthengine authenticate --quiet

In [None]:
### ONLY if authentication failed ###
!earthengine authenticate --authorization-code=PLACE_AUTH_CODE_HERE

In [None]:
from IPython.display import Image

# Load a Landsat image.
img = ee.Image('LANDSAT/LT05/C01/T1_SR/LT05_034033_20000913')

print(img)
print(img.getInfo())

url = img.getThumbURL({
        'min': 0,
        'max': 3000,
        'dimensions': 500,})

print(url)
Image(url, format='png')

In [3]:
import os
import logging
import sys
import zipfile
import urllib.request as request
import numpy as np

In [4]:
def url_download(url, savePath, saveName):

    logging.basicConfig(
        format='%(asctime)s %(levelname)s %(message)s',
        level=logging.INFO,
        stream=sys.stdout)
    
    filePath = savePath + saveName

    if os.path.isfile(filePath):
        os.system("rm " + filePath)
        logging.info("Existed file deleted: " + saveName)
    else:
        logging.info("File doesn't exist.")

    file_dir = savePath
    if not os.path.exists(file_dir):
        logging.info("Make direction: " + savePath)
        os.mkdir(file_dir)

    def down(_save_path, _url):
        try:
            request.urlretrieve(_url, _save_path)
            return True
        except:
            print('\nError when retrieving the URL:\n{}'.format(_url))
            return False

    flag = down(filePath, url)
    if flag:
        print("------- Download Finished! ---------")
    else:
        print("------- Download Fail --------")

In [5]:
def un_zip(dataPath, dataName, unzipPath):
    """unzip zip file"""
    
    zip_file = zipfile.ZipFile(dataPath + dataName + ".zip")
    
    if os.path.isdir(unzipPath):
        pass
    else:
        os.mkdir(unzipPath)
    
    for names in zip_file.namelist():
        zip_file.extract(names, unzipPath)
    
    zip_file.close()

In [6]:
def batchReName(dataPath, savePath, saveName):
    
    print("dataPath: " + dataPath)
    
    for file in os.listdir(dataPath):
        format = "." + file.split(".")[-1]
        orginalName = file.split(".")[0]
        saveFileName = file.replace(orginalName, saveName)

        if format == ".tif" and ('angle' not in file):
            if os.path.isfile(savePath + saveFileName):
                os.system("rm " + savePath + saveFileName)
            os.rename(dataPath + file, savePath + saveFileName)

In [7]:
def batch_data_download_GEE(downLoadList, pathKeyWord, scale, bands4download, toRGBflag):

    savePath = rootPath + "{:}_{:}m_{:}_GEE_download_zip/".format(fireName, scale, pathKeyWord)
    tifPath = rootPath + "{:}_{:}m_{:}_Tif_collection/".format(fireName, scale, pathKeyWord)
    pngPath = rootPath + "{:}_{:}m_{:}_PNG_collection/".format(fireName, scale, pathKeyWord)
    rgbPath = rootPath + "{:}_{:}m_{:}_RGB_collection/".format(fireName, scale, pathKeyWord)
    
    print("downloadList: {}".format(downLoadList))

    for saveName in downLoadList:

        img = globals()[saveName]
        img = ee.Image(img)

        if len(bands4download) > 0:
            url = (img.select(bands4download).getDownloadUrl(
                {
                    'region': fireCoords,
                    'scale': scale,
                }
            ))
        else:
            url = (img.getDownloadUrl(
                {
                    'region': fireCoords,
                    'scale': scale,
                }
            ))

        if not os.path.exists(savePath):
            os.makedirs(savePath)

        print("{}:\n {}".format(saveName, url))
        print("savePath:", savePath)
        url_download(url=url, savePath=savePath, saveName=saveName + ".zip")

        unzipPath = savePath + "unzipedFiles/"
        un_zip(savePath, saveName, unzipPath)

        renamePath = savePath + "renamedFiles/"
        renamedName = saveName

        os.system("rd/s/q " + renamePath)

        if not os.path.exists(renamePath):
            os.makedirs(renamePath)
        batchReName(unzipPath, renamePath, renamedName)

        # ======================================================

        os.system("rd/s/q " + tifPath)  # remove directory
#         shutil.copytree(renamePath, tifPath)  # move directory

#         from snappy_tif_processing import tif2png, tif2snapTif
        from gdal_tif2rgb import tifBand2png_GDAL, bandsMerge2tif

        bandsMerge2tif(renamePath, saveName, tifPath, saveName, stretchFlag=True)

        for file in os.listdir(renamePath):
            fileName = file[:-4]
            tifBand2png_GDAL(renamePath, fileName, pngPath, fileName)
#             shutil.copy(renamePath + fileName + ".tif", tifPath)
#             tif2snapTif(renamePath, fileName, tifPath) # copy tif files

        # ## ============== delete directory =====================
        os.system("rd/s/q " + unzipPath)
        os.system("rd/s/q " + renamePath)
        os.system("rm " + savePath + saveName + ".zip")

        if toRGBflag:
            from bands2rgb import pngBand2rgb
            if satName == 'S2':
                rgbBands = ['B1','B2','B3','B4','B5','B6','B7','B8']
            elif satName == 'L8':
                rgbBands = ['B7', 'B6', 'B7']
            pngBand2rgb(pngPath, saveName, rgbPath, saveName, bands=rgbBands)

In [8]:
def set_timeEnd_newdays(img):
    group_days = img.date().format().slice(0, 10)
    return img.set('system:time_end', group_days)

# "group by" date
def group_days(imgcollection):
    imgCol_sort = imgcollection.sort("system:time_start")
    imgCol = imgCol_sort.map(set_timeEnd_newdays)
    d = imgCol.distinct(['system:time_end'])
    di = ee.ImageCollection(d)
    date_eq_filter = (ee.Filter.equals(leftField= 'system:time_end',
                                       rightField ='system:time_end'))
    saveall = ee.Join.saveAll("to_mosaic")
    j = saveall.apply(di, imgCol, date_eq_filter)
    ji = ee.ImageCollection(j)
    original_proj = ee.Image(ji.first()).select(0).projection()

    def mosaicImageBydate(img):
        mosaiced = ee.ImageCollection.fromImages(img.get('to_mosaic')).mosaic().copyProperties(img, img.propertyNames())
        return ee.Image(mosaiced)

    imgcollection_grouped = ji.map(mosaicImageBydate)

    return ee.ImageCollection(imgcollection_grouped.copyProperties(imgCol, imgCol.propertyNames()))

In [9]:
if __name__ == "__main__":
    
    ee.Initialize()
    
    paris = ee.Geometry.Rectangle([2.428976693985887,48.80552750815156,
                                   2.240835824845262,48.90604915562432])
    parisCoords = [[2.428976693985887,48.80552750815156],[2.428976693985887,48.90604915562432],
                  [2.240835824845262,48.90604915562432],[2.240835824845262,48.80552750815156]]

    # Basin Fire, ~2020-05-10
    # https://wildfiretoday.com/2020/05/11/basin-fire-burns-10000-acres-north-of-grand-canyon/
    basin = ee.Geometry.Rectangle([-112.47366492264943,34.58033492861966,
                                   -115.85196082108693,37.345294025581794])
    basinCoords = [[-112.47366492264943,34.58033492861966],[-112.47366492264943,37.345294025581794],
                   [-115.85196082108693,37.345294025581794],[-115.85196082108693,34.58033492861966]]
    
    # https://www.bbc.com/news/world-australia-50951043
    # Stirling Ranges, ~2019-12-29
    stirlingRanges = ee.Geometry.Rectangle([118.39626387050868,-34.58409156988779611,
                                   7.93895796230555,-34.23796977775188])
    stirlingRangesCoords = [[118.39626387050868,-34.584091569887796],[118.39626387050868,-34.23796977775188],
                   [117.93895796230555,-34.23796977775188],[117.93895796230555,-34.584091569887796]]
    
    # Green Range
    greenRange = ee.Geometry.Rectangle([118.53999746565552,-34.80035628523257,
                                   118.53999746565552,-34.63498692719189])
    greenRangeCoords = [[118.53999746565552,-34.80035628523257],[118.53999746565552,-34.63498692719189],
                        [118.34293020491333,-34.63498692719189],[118.34293020491333,-34.80035628523257]]
  
    fire = greenRange
    fireCoords = greenRangeCoords
    
    t1 = '2020-01-10'
    t2 = '2020-01-15'

    S2 = (ee.ImageCollection("COPERNICUS/S2")
          .filterDate(t1, t2)
          .filterBounds(fire)
          .sort('system:time_start'))

    # -----------------------------------
    satName = 'S2'
    col = globals()[satName]#.merge(L8)
    printImgIdBeforeMosaicFlag = True

    fireName = 'greenRange'
    preFix = 'MSI'
    scale = 20
    dNBR_Flag = False
    rootPath = "/home/robin/Documents/stanford/cs231n-project/download/"
    
    # ===================================
    
    if printImgIdBeforeMosaicFlag:
        print("-------------------------------------------------------")
        num0 = col.size().getInfo()
        print("Size of ImageCollection before Mosaicing: {}".format(num0))
        print("------------------- Image id List  --------------------")
        col_list = col.toList(num0)
        for idx in range(num0):
            img = ee.Image(col_list.get(idx))
            if 'S2' in satName:
                date = img.get("system:index").getInfo()[:8]
            elif 'L8' in satName:
                date = img.get("system:index").getInfo()[12:]
            else:
                date = None
                print("------------- No Matched Satellite ------------")

            varName = "{}_{}_{}".format(preFix, date, satName)
            
    print("--------------end--------------")

    # ===================================

    col_groupedBydate = group_days(col)
    num = col_groupedBydate.size().getInfo()
    
    print("-------------------------------------------------------")
    print("Size of ImageCollection after Mosaicing: {}".format(num))
    print("-------------------- Download List --------------------")
    dataList = col_groupedBydate.toList(num)

    msiDownLoadList = []
    
    for i in range(0, num):
        img = ee.Image(dataList.get(i))

        if 'S2' in satName:
            date = img.get("system:index").getInfo()[:8]
        elif 'L8' in satName:
            date = img.get("system:index").getInfo()[12:]
        else:
            date = None
            print("------------- No Matched Satellite ------------")

        varName = "{}_{}_{}".format(preFix, date, satName)
            
        globals()[varName] = img
        msiDownLoadList.append(varName)
        print("{}".format(varName))

    num = len(msiDownLoadList)
    print("Number of optical images: {}".format(num))
    print("----------------------------------------------------------")

    if satName == 'S2':
        swirBands = ['B1','B2','B3','B4','B5','B6','B7','B8']
    elif satName == 'L8':
        swirBands = ['B6', 'B7']
    else:
        swirBands = []

    batch_data_download_GEE(downLoadList=msiDownLoadList, pathKeyWord="MSI", scale=scale,
                            bands4download=swirBands, toRGBflag=True)


-------------------------------------------------------
Size of ImageCollection before Mosaicing: 2
------------------- Image id List  --------------------
--------------end--------------
-------------------------------------------------------
Size of ImageCollection after Mosaicing: 2
-------------------- Download List --------------------
MSI_20200111_S2
MSI_20200114_S2
Number of optical images: 2
----------------------------------------------------------
downloadList: ['MSI_20200111_S2', 'MSI_20200114_S2']
MSI_20200111_S2:
 https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/thumbnails/8d67fe1b0e923c2ef39ddaaf357c55d1-05a65e8b239d7bdaabad475489b4635c:getPixels
savePath: /home/robin/Documents/stanford/cs231n-project/download/greenRange_20m_MSI_GEE_download_zip/
2020-05-14 21:00:57,587 INFO File doesn't exist.
------- Download Finished! ---------
dataPath: /home/robin/Documents/stanford/cs231n-project/download/greenRange_20m_MSI_GEE_download_zip/unzipedFiles/
['B6',