# EO-Forge GCP Downloader

## EO-Forge Downloader

We will cover the basics steps to download scenes from:

-  Landsat 5
-  Landsat 8
-  Sentinel 2


In [1]:
import os
from eo_forge.utils.downloader import bucket_images_downloader, gcSatImg

### Configurations

Pick dates and folders to dump the images

In [2]:
# date
INI_DATETIME_STR = "2021-08-10"
END_DATETIME_STR = "2021-09-09"
#
BASE_PROD = "./products-int/"
#
# IMAGES CROP
IMAGES_RAW = os.path.join(BASE_PROD, "images-raw-download")
os.makedirs(IMAGES_RAW, exist_ok=True)

## Sentinel 2 Tile

For Sentinel 2 you will need a boto config file.


### Query Images Data (on TILE)

In [3]:
# Tile to Download
TILE = ["19", "F", "CF"]
# Filters on Results
SENTINEL2_FILTERS = ["*_R010_*"]

In [4]:
import os

In [5]:
# initImagesCheck
gcp_images = gcSatImg(spacecraft="S2")
# Check Images ond Google Cloud Sentinel Bucket
gcp_images.gcImagesCheck(TILE)
# Now filt them
gcp_images.gcImagesFilt(
    filters=SENTINEL2_FILTERS, dates=[INI_DATETIME_STR, END_DATETIME_STR]
)

2022/02/09 14:29:46 - INFO - Running on spacecraft S2
2022/02/09 14:29:46 - INFO - Setting boto path to: /home/frojo/.boto
2022/02/09 14:29:46 - DEBUG - Running: gsutil ls gs://gcp-public-data-sentinel-2/tiles/19/F/CF/
2022/02/09 14:29:50 - INFO - Checking bucket: gsutil ls gs://gcp-public-data-sentinel-2/tiles/19/F/CF/
2022/02/09 14:29:50 - DEBUG - Copying meta with cmd: gsutil cp gs://gcp-public-data-sentinel-2/tiles/19/F/CF/S2A_MSIL1C_20210810T141741_N0301_R010_T19FCF_20210810T193059.SAFE/MTD_MSIL1C.xml /tmp/tmpxfhen18n
2022/02/09 14:29:52 - DEBUG - Copying meta with cmd: gsutil cp gs://gcp-public-data-sentinel-2/tiles/19/F/CF/S2A_MSIL1C_20210820T141741_N0301_R010_T19FCF_20210820T192708.SAFE/MTD_MSIL1C.xml /tmp/tmpxfhen18n
2022/02/09 14:29:55 - DEBUG - Copying meta with cmd: gsutil cp gs://gcp-public-data-sentinel-2/tiles/19/F/CF/S2A_MSIL1C_20210830T141741_N0301_R010_T19FCF_20210830T192632.SAFE/MTD_MSIL1C.xml /tmp/tmpxfhen18n
2022/02/09 14:29:57 - DEBUG - Copying meta with cmd: gsut

In [6]:
gcp_images.pd_filt

Unnamed: 0,product-id,base-url,date,clouds
0,S2A_MSIL1C_20210810T141741_N0301_R010_T19FCF_2...,gs://gcp-public-data-sentinel-2/tiles/19/F/CF/...,2021-08-10,0.0764
1,S2A_MSIL1C_20210820T141741_N0301_R010_T19FCF_2...,gs://gcp-public-data-sentinel-2/tiles/19/F/CF/...,2021-08-20,8.1912
2,S2A_MSIL1C_20210830T141741_N0301_R010_T19FCF_2...,gs://gcp-public-data-sentinel-2/tiles/19/F/CF/...,2021-08-30,0.5686
3,S2A_MSIL1C_20210909T141741_N0301_R010_T19FCF_2...,gs://gcp-public-data-sentinel-2/tiles/19/F/CF/...,2021-09-09,9.2897
4,S2B_MSIL1C_20210815T141739_N0301_R010_T19FCF_2...,gs://gcp-public-data-sentinel-2/tiles/19/F/CF/...,2021-08-15,88.9605
5,S2B_MSIL1C_20210825T141739_N0301_R010_T19FCF_2...,gs://gcp-public-data-sentinel-2/tiles/19/F/CF/...,2021-08-25,80.1781
6,S2B_MSIL1C_20210904T141729_N0301_R010_T19FCF_2...,gs://gcp-public-data-sentinel-2/tiles/19/F/CF/...,2021-09-04,47.22


In [7]:
# we will just download one
pd_filt = gcp_images.pd_filt.iloc[[0]]

### Download Selected Images

Prior to 20220125 L1C products included 
- MSK_CLOUDS_B00.gml

After that date:
- MSK_CLASSI_B00.jp2

We try to download both but fails depending on the date (expected behavior)


In [8]:
# Bands to Query
SENTINEL2_BANDS = ["B02", "B03", "B04", "B08", "B11"]
# Init Google Cloud bucket Downloader
bid = bucket_images_downloader(spacecraft="S2", bands=SENTINEL2_BANDS)
# Start Downloading with up to 5 simultaneous downloads
bid.execute(
    pd_filt["base-url"].to_list(),
    archive=IMAGES_RAW,
    max_proc_thread=5,
    force_download=False,
)

2022/02/09 14:30:08 - INFO - Running on spacecraft S2
2022/02/09 14:30:08 - INFO - Requesting bands ['B02', 'B03', 'B04', 'B08', 'B11']
2022/02/09 14:30:10 - DEBUG - Queueing cmd for download: gsutil cp gs://gcp-public-data-sentinel-2/tiles/19/F/CF/S2A_MSIL1C_20210810T141741_N0301_R010_T19FCF_20210810T193059.SAFE/GRANULE/L1C_T19FCF_A032039_20210810T143443/IMG_DATA/T19FCF_20210810T141741_B02.jp2 ./products-int/images-raw-download/S2A_MSIL1C_20210810T141741_N0301_R010_T19FCF_20210810T193059.SAFE/GRANULE/L1C_T19FCF_A032039_20210810T143443/IMG_DATA/
2022/02/09 14:30:10 - DEBUG - Queueing cmd for download: gsutil cp gs://gcp-public-data-sentinel-2/tiles/19/F/CF/S2A_MSIL1C_20210810T141741_N0301_R010_T19FCF_20210810T193059.SAFE/GRANULE/L1C_T19FCF_A032039_20210810T143443/IMG_DATA/T19FCF_20210810T141741_B03.jp2 ./products-int/images-raw-download/S2A_MSIL1C_20210810T141741_N0301_R010_T19FCF_20210810T193059.SAFE/GRANULE/L1C_T19FCF_A032039_20210810T143443/IMG_DATA/
2022/02/09 14:30:10 - DEBUG - Qu

In [9]:
# date
INI_DATETIME_STR_NEW = "2022-01-31"
END_DATETIME_STR_NEW = "2022-02-09"

In [10]:
# initImagesCheck
gcp_images = gcSatImg(spacecraft="S2")
# Check Images ond Google Cloud Sentinel Bucket
gcp_images.gcImagesCheck(TILE)
# Now filt them
gcp_images.gcImagesFilt(
    filters=SENTINEL2_FILTERS, dates=[INI_DATETIME_STR_NEW, END_DATETIME_STR_NEW]
)

2022/02/09 14:32:04 - INFO - Running on spacecraft S2
2022/02/09 14:32:04 - INFO - Setting boto path to: /home/frojo/.boto
2022/02/09 14:32:04 - DEBUG - Running: gsutil ls gs://gcp-public-data-sentinel-2/tiles/19/F/CF/
2022/02/09 14:32:07 - INFO - Checking bucket: gsutil ls gs://gcp-public-data-sentinel-2/tiles/19/F/CF/
2022/02/09 14:32:07 - DEBUG - Copying meta with cmd: gsutil cp gs://gcp-public-data-sentinel-2/tiles/19/F/CF/S2A_MSIL1C_20220206T141741_N0400_R010_T19FCF_20220206T192656.SAFE/MTD_MSIL1C.xml /tmp/tmpu_kbxeh1
2022/02/09 14:32:09 - DEBUG - Copying meta with cmd: gsutil cp gs://gcp-public-data-sentinel-2/tiles/19/F/CF/S2B_MSIL1C_20220201T141729_N0400_R010_T19FCF_20220201T171833.SAFE/MTD_MSIL1C.xml /tmp/tmpu_kbxeh1


In [11]:
gcp_images.pd_filt

Unnamed: 0,product-id,base-url,date,clouds
0,S2A_MSIL1C_20220206T141741_N0400_R010_T19FCF_2...,gs://gcp-public-data-sentinel-2/tiles/19/F/CF/...,2022-02-06,84.414345
1,S2B_MSIL1C_20220201T141729_N0400_R010_T19FCF_2...,gs://gcp-public-data-sentinel-2/tiles/19/F/CF/...,2022-02-01,1.156887


In [12]:
# we will just download one
pd_filt = gcp_images.pd_filt.iloc[[0]]

In [13]:
# Bands to Query
SENTINEL2_BANDS = ["B02", "B03", "B04", "B08", "B11"]
# Init Google Cloud bucket Downloader
bid = bucket_images_downloader(spacecraft="S2", bands=SENTINEL2_BANDS)
# Start Downloading with up to 5 simultaneous downloads
bid.execute(
    pd_filt["base-url"].to_list(),
    archive=IMAGES_RAW,
    max_proc_thread=5,
    force_download=False,
)

2022/02/09 14:32:12 - INFO - Running on spacecraft S2
2022/02/09 14:32:12 - INFO - Requesting bands ['B02', 'B03', 'B04', 'B08', 'B11']
2022/02/09 14:32:15 - DEBUG - Queueing cmd for download: gsutil cp gs://gcp-public-data-sentinel-2/tiles/19/F/CF/S2A_MSIL1C_20220206T141741_N0400_R010_T19FCF_20220206T192656.SAFE/GRANULE/L1C_T19FCF_A034613_20220206T142947/IMG_DATA/T19FCF_20220206T141741_B02.jp2 ./products-int/images-raw-download/S2A_MSIL1C_20220206T141741_N0400_R010_T19FCF_20220206T192656.SAFE/GRANULE/L1C_T19FCF_A034613_20220206T142947/IMG_DATA/
2022/02/09 14:32:15 - DEBUG - Queueing cmd for download: gsutil cp gs://gcp-public-data-sentinel-2/tiles/19/F/CF/S2A_MSIL1C_20220206T141741_N0400_R010_T19FCF_20220206T192656.SAFE/GRANULE/L1C_T19FCF_A034613_20220206T142947/IMG_DATA/T19FCF_20220206T141741_B03.jp2 ./products-int/images-raw-download/S2A_MSIL1C_20220206T141741_N0400_R010_T19FCF_20220206T192656.SAFE/GRANULE/L1C_T19FCF_A034613_20220206T142947/IMG_DATA/
2022/02/09 14:32:15 - DEBUG - Qu

## Landsat 8

### Query Images Data (on PATH/ROW)

In [14]:
# Tile to Download
TILE = ["230", "094"]
# Filters on Results
LANDSAT_FILTERS = ["*_L1TP_*"]

In [15]:
# initImagesCheck
gcp_images = gcSatImg(spacecraft="L8")
# Check Images ond Google Cloud Sentinel Bucket
gcp_images.gcImagesCheck(TILE)
# Now filt them
gcp_images.gcImagesFilt(
    filters=LANDSAT_FILTERS, dates=[INI_DATETIME_STR, END_DATETIME_STR]
)

2022/02/09 14:33:54 - INFO - Running on spacecraft L8
2022/02/09 14:33:54 - INFO - Setting boto path to: /home/frojo/.boto
2022/02/09 14:33:54 - DEBUG - Running: gsutil ls gs://gcp-public-data-landsat/LC08/01/230/094/
2022/02/09 14:33:56 - INFO - Checking bucket: gsutil ls gs://gcp-public-data-landsat/LC08/01/230/094/
2022/02/09 14:33:56 - DEBUG - Copying meta with cmd: gsutil cp gs://gcp-public-data-landsat/LC08/01/230/094/LC08_L1TP_230094_20210904_20210904_01_RT/LC08_L1TP_230094_20210904_20210904_01_RT_MTL.txt /tmp/tmpnccjdr00
2022/02/09 14:34:00 - DEBUG - Copying meta with cmd: gsutil cp gs://gcp-public-data-landsat/LC08/01/230/094/LC08_L1TP_230094_20210904_20210910_01_T1/LC08_L1TP_230094_20210904_20210910_01_T1_MTL.txt /tmp/tmpnccjdr00


In [16]:
gcp_images.pd_filt

Unnamed: 0,product-id,base-url,date,clouds
0,LC08_L1TP_230094_20210904_20210904_01_RT,gs://gcp-public-data-landsat/LC08/01/230/094/L...,2021-09-04,27.26
1,LC08_L1TP_230094_20210904_20210910_01_T1,gs://gcp-public-data-landsat/LC08/01/230/094/L...,2021-09-04,27.26


In [17]:
# we will just download one
pd_filt = gcp_images.pd_filt.copy()

### Download Selected Images

In [18]:
# Bands to Query
LANDSAT8_BANDS = ["B2", "B3", "B4", "B5", "B6"]
# Init Google Cloud bucket Downloader
bid = bucket_images_downloader(spacecraft="L8", bands=LANDSAT8_BANDS)
# Start Downloading with up to 5 simultaneous downloads
bid.execute(
    pd_filt["base-url"].to_list(),
    archive=IMAGES_RAW,
    max_proc_thread=5,
    force_download=False,
)

2022/02/09 14:34:02 - INFO - Running on spacecraft L8
2022/02/09 14:34:02 - INFO - Requesting bands ['B2', 'B3', 'B4', 'B5', 'B6']
2022/02/09 14:34:02 - DEBUG - Queueing cmd for download: gsutil cp gs://gcp-public-data-landsat/LC08/01/230/094/LC08_L1TP_230094_20210904_20210904_01_RT/LC08_L1TP_230094_20210904_20210904_01_RT_B2.TIF ./products-int/images-raw-download/LC08_L1TP_230094_20210904_20210904_01_RT
2022/02/09 14:34:02 - DEBUG - Queueing cmd for download: gsutil cp gs://gcp-public-data-landsat/LC08/01/230/094/LC08_L1TP_230094_20210904_20210904_01_RT/LC08_L1TP_230094_20210904_20210904_01_RT_B3.TIF ./products-int/images-raw-download/LC08_L1TP_230094_20210904_20210904_01_RT
2022/02/09 14:34:02 - DEBUG - Queueing cmd for download: gsutil cp gs://gcp-public-data-landsat/LC08/01/230/094/LC08_L1TP_230094_20210904_20210904_01_RT/LC08_L1TP_230094_20210904_20210904_01_RT_B4.TIF ./products-int/images-raw-download/LC08_L1TP_230094_20210904_20210904_01_RT
2022/02/09 14:34:02 - DEBUG - Queueing

## Landsat 5 - Historic Arxive

### Query Images Data (on PATH/ROW)

In [19]:
# Tile to Download
TILE = ["230", "094"]
# Filters on Results
LANDSAT_FILTERS = ["*_L1TP_*"]
# date
INI_DATETIME_STR = "2011-09-10"
END_DATETIME_STR = "2011-11-10"

In [20]:
# initImagesCheck
gcp_images = gcSatImg(spacecraft="L5")
# Check Images ond Google Cloud Sentinel Bucket
gcp_images.gcImagesCheck(TILE)
# Now filt them
gcp_images.gcImagesFilt(
    filters=LANDSAT_FILTERS, dates=[INI_DATETIME_STR, END_DATETIME_STR]
)

2022/02/09 14:36:59 - INFO - Running on spacecraft L5
2022/02/09 14:36:59 - INFO - Setting boto path to: /home/frojo/.boto
2022/02/09 14:36:59 - DEBUG - Running: gsutil ls gs://gcp-public-data-landsat/LT05/01/230/094/
2022/02/09 14:37:01 - INFO - Checking bucket: gsutil ls gs://gcp-public-data-landsat/LT05/01/230/094/
2022/02/09 14:37:01 - DEBUG - Copying meta with cmd: gsutil cp gs://gcp-public-data-landsat/LT05/01/230/094/LT05_L1TP_230094_20111011_20161005_01_T1/LT05_L1TP_230094_20111011_20161005_01_T1_MTL.txt /tmp/tmp6bru9nk2
2022/02/09 14:37:03 - DEBUG - Copying meta with cmd: gsutil cp gs://gcp-public-data-landsat/LT05/01/230/094/LT05_L1TP_230094_20111027_20161005_01_T1/LT05_L1TP_230094_20111027_20161005_01_T1_MTL.txt /tmp/tmp6bru9nk2


In [21]:
gcp_images.pd_filt

Unnamed: 0,product-id,base-url,date,clouds
0,LT05_L1TP_230094_20111011_20161005_01_T1,gs://gcp-public-data-landsat/LT05/01/230/094/L...,2011-10-11,7.0
1,LT05_L1TP_230094_20111027_20161005_01_T1,gs://gcp-public-data-landsat/LT05/01/230/094/L...,2011-10-27,12.0


In [22]:
# we will just download one
pd_filt = gcp_images.pd_filt.iloc[[0]]

### Download Selected Images

In [23]:
# Bands to Query
LANDSAT5_BANDS = ["B1", "B2", "B3", "B4", "B5"]
# Init Google Cloud bucket Downloader
bid = bucket_images_downloader(spacecraft="L5", bands=LANDSAT5_BANDS)
# Start Downloading with up to 5 simultaneous downloads
bid.execute(
    pd_filt["base-url"].to_list(),
    archive=IMAGES_RAW,
    max_proc_thread=5,
    force_download=False,
)

2022/02/09 14:37:05 - INFO - Running on spacecraft L5
2022/02/09 14:37:05 - INFO - Requesting bands ['B1', 'B2', 'B3', 'B4', 'B5']
2022/02/09 14:37:05 - DEBUG - Queueing cmd for download: gsutil cp gs://gcp-public-data-landsat/LT05/01/230/094/LT05_L1TP_230094_20111011_20161005_01_T1/LT05_L1TP_230094_20111011_20161005_01_T1_B1.TIF ./products-int/images-raw-download/LT05_L1TP_230094_20111011_20161005_01_T1
2022/02/09 14:37:05 - DEBUG - Queueing cmd for download: gsutil cp gs://gcp-public-data-landsat/LT05/01/230/094/LT05_L1TP_230094_20111011_20161005_01_T1/LT05_L1TP_230094_20111011_20161005_01_T1_B2.TIF ./products-int/images-raw-download/LT05_L1TP_230094_20111011_20161005_01_T1
2022/02/09 14:37:05 - DEBUG - Queueing cmd for download: gsutil cp gs://gcp-public-data-landsat/LT05/01/230/094/LT05_L1TP_230094_20111011_20161005_01_T1/LT05_L1TP_230094_20111011_20161005_01_T1_B3.TIF ./products-int/images-raw-download/LT05_L1TP_230094_20111011_20161005_01_T1
2022/02/09 14:37:05 - DEBUG - Queueing