# EO-Forge GCP Downloader

## EO-Forge Downloader

We will cover the basics steps to download scenes from:

-  Landsat 5
-  Landsat 8
-  Sentinel 2


In [1]:
import os
from eo_forge.utils.downloader import bucket_images_downloader, gcSatImg

### Configurations

Pick dates and folders to dump the images

In [2]:
# date
INI_DATETIME_STR = "2021-08-10"
END_DATETIME_STR = "2021-09-09"
#
BASE_PROD = "./products-int/"
#
# IMAGES CROP
IMAGES_RAW = os.path.join(BASE_PROD, "images-raw")
os.makedirs(IMAGES_RAW, exist_ok=True)

## Sentinel 2 Tile

__NOTE__: Have you checked google cloud requisites in _eo-forge-gcp-downloader-0.ipynb_?

For Sentinel 2 you will need a boto config file.


### Query Images Data (on TILE)

In [3]:
# Tile to Download
TILE = ["19", "F", "CF"]
# Filters on Results
SENTINEL2_FILTERS = ["*_R010_*"]

In [4]:
import os

In [5]:
# initImagesCheck
gcp_images = gcSatImg(spacecraft="S2")
# Check Images ond Google Cloud Sentinel Bucket
gcp_images.gcImagesCheck(TILE)
# Now filt them
gcp_images.gcImagesFilt(
    filters=SENTINEL2_FILTERS, dates=[INI_DATETIME_STR, END_DATETIME_STR]
)

2021/12/11 11:55:02 - INFO - Running on spacecraft S2
2021/12/11 11:55:02 - INFO - Setting boto path to: /home/aperez/.boto
2021/12/11 11:55:02 - DEBUG - Running: gsutil ls gs://gcp-public-data-sentinel-2/tiles/19/F/CF/
2021/12/11 11:55:03 - INFO - Checking bucket: gsutil ls gs://gcp-public-data-sentinel-2/tiles/19/F/CF/
2021/12/11 11:55:03 - DEBUG - Copying meta with cmd: gsutil cp gs://gcp-public-data-sentinel-2/tiles/19/F/CF/S2A_MSIL1C_20210810T141741_N0301_R010_T19FCF_20210810T193059.SAFE/MTD_MSIL1C.xml /tmp/tmp54oh9k0r
2021/12/11 11:55:04 - DEBUG - Copying meta with cmd: gsutil cp gs://gcp-public-data-sentinel-2/tiles/19/F/CF/S2A_MSIL1C_20210820T141741_N0301_R010_T19FCF_20210820T192708.SAFE/MTD_MSIL1C.xml /tmp/tmp54oh9k0r
2021/12/11 11:55:05 - DEBUG - Copying meta with cmd: gsutil cp gs://gcp-public-data-sentinel-2/tiles/19/F/CF/S2A_MSIL1C_20210830T141741_N0301_R010_T19FCF_20210830T192632.SAFE/MTD_MSIL1C.xml /tmp/tmp54oh9k0r
2021/12/11 11:55:07 - DEBUG - Copying meta with cmd: gsu

In [6]:
gcp_images.pd_filt

Unnamed: 0,product-id,base-url,date,clouds
0,S2A_MSIL1C_20210810T141741_N0301_R010_T19FCF_2...,gs://gcp-public-data-sentinel-2/tiles/19/F/CF/...,2021-08-10,0.0764
1,S2A_MSIL1C_20210820T141741_N0301_R010_T19FCF_2...,gs://gcp-public-data-sentinel-2/tiles/19/F/CF/...,2021-08-20,8.1912
2,S2A_MSIL1C_20210830T141741_N0301_R010_T19FCF_2...,gs://gcp-public-data-sentinel-2/tiles/19/F/CF/...,2021-08-30,0.5686
3,S2A_MSIL1C_20210909T141741_N0301_R010_T19FCF_2...,gs://gcp-public-data-sentinel-2/tiles/19/F/CF/...,2021-09-09,9.2897
4,S2B_MSIL1C_20210815T141739_N0301_R010_T19FCF_2...,gs://gcp-public-data-sentinel-2/tiles/19/F/CF/...,2021-08-15,88.9605
5,S2B_MSIL1C_20210825T141739_N0301_R010_T19FCF_2...,gs://gcp-public-data-sentinel-2/tiles/19/F/CF/...,2021-08-25,80.1781
6,S2B_MSIL1C_20210904T141729_N0301_R010_T19FCF_2...,gs://gcp-public-data-sentinel-2/tiles/19/F/CF/...,2021-09-04,47.22


In [7]:
# we will just download one
pd_filt = gcp_images.pd_filt.iloc[[0]]

### Download Selected Images

In [8]:
# Bands to Query
SENTINEL2_BANDS = ["B02", "B03", "B04", "B08", "B11"]
# Init Google Cloud bucket Downloader
bid = bucket_images_downloader(spacecraft="S2", bands=SENTINEL2_BANDS)
# Start Downloading with up to 5 simultaneous downloads
bid.execute(
    pd_filt["base-url"].to_list(),
    archive=IMAGES_RAW,
    max_proc_thread=5,
    force_download=False,
)

2021/12/11 11:55:11 - INFO - Running on spacecraft S2
2021/12/11 11:55:11 - INFO - Requesting bands ['B02', 'B03', 'B04', 'B08', 'B11']
2021/12/11 11:55:11 - INFO - Skipping ./products-int/images-raw/S2A_MSIL1C_20210810T141741_N0301_R010_T19FCF_20210810T193059.SAFE (dir already existed).


## Landsat 8

### Query Images Data (on PATH/ROW)

In [9]:
# Tile to Download
TILE = ["230", "094"]
# Filters on Results
LANDSAT_FILTERS = ["*_L1TP_*"]

In [10]:
# initImagesCheck
gcp_images = gcSatImg(spacecraft="L8")
# Check Images ond Google Cloud Sentinel Bucket
gcp_images.gcImagesCheck(TILE)
# Now filt them
gcp_images.gcImagesFilt(
    filters=LANDSAT_FILTERS, dates=[INI_DATETIME_STR, END_DATETIME_STR]
)

2021/12/11 11:55:11 - INFO - Running on spacecraft L8
2021/12/11 11:55:11 - INFO - Setting boto path to: /home/aperez/.boto
2021/12/11 11:55:11 - DEBUG - Running: gsutil ls gs://gcp-public-data-landsat/LC08/01/230/094/
2021/12/11 11:55:12 - INFO - Checking bucket: gsutil ls gs://gcp-public-data-landsat/LC08/01/230/094/
2021/12/11 11:55:12 - DEBUG - Copying meta with cmd: gsutil cp gs://gcp-public-data-landsat/LC08/01/230/094/LC08_L1TP_230094_20210904_20210904_01_RT/LC08_L1TP_230094_20210904_20210904_01_RT_MTL.txt /tmp/tmphof3i5fj
2021/12/11 11:55:12 - DEBUG - Copying meta with cmd: gsutil cp gs://gcp-public-data-landsat/LC08/01/230/094/LC08_L1TP_230094_20210904_20210910_01_T1/LC08_L1TP_230094_20210904_20210910_01_T1_MTL.txt /tmp/tmphof3i5fj


In [11]:
gcp_images.pd_filt

Unnamed: 0,product-id,base-url,date,clouds
0,LC08_L1TP_230094_20210904_20210904_01_RT,gs://gcp-public-data-landsat/LC08/01/230/094/L...,2021-09-04,27.26
1,LC08_L1TP_230094_20210904_20210910_01_T1,gs://gcp-public-data-landsat/LC08/01/230/094/L...,2021-09-04,27.26


In [12]:
# we will just download one
pd_filt = gcp_images.pd_filt.copy()

### Download Selected Images

In [13]:
# Bands to Query
LANDSAT8_BANDS = ["B2", "B3", "B4", "B5", "B6"]
# Init Google Cloud bucket Downloader
bid = bucket_images_downloader(spacecraft="L8", bands=LANDSAT8_BANDS)
# Start Downloading with up to 5 simultaneous downloads
bid.execute(
    pd_filt["base-url"].to_list(),
    archive=IMAGES_RAW,
    max_proc_thread=5,
    force_download=False,
)

2021/12/11 11:55:13 - INFO - Running on spacecraft L8
2021/12/11 11:55:13 - INFO - Requesting bands ['B2', 'B3', 'B4', 'B5', 'B6']
2021/12/11 11:55:13 - INFO - Skipping ./products-int/images-raw/LC08_L1TP_230094_20210904_20210904_01_RT (dir already existed).
2021/12/11 11:55:13 - INFO - Skipping ./products-int/images-raw/LC08_L1TP_230094_20210904_20210910_01_T1 (dir already existed).


## Landsat 5 - Historic Arxive

### Query Images Data (on PATH/ROW)

In [14]:
# Tile to Download
TILE = ["230", "094"]
# Filters on Results
LANDSAT_FILTERS = ["*_L1TP_*"]
# date
INI_DATETIME_STR = "2011-09-10"
END_DATETIME_STR = "2011-11-10"

In [15]:
# initImagesCheck
gcp_images = gcSatImg(spacecraft="L5")
# Check Images ond Google Cloud Sentinel Bucket
gcp_images.gcImagesCheck(TILE)
# Now filt them
gcp_images.gcImagesFilt(
    filters=LANDSAT_FILTERS, dates=[INI_DATETIME_STR, END_DATETIME_STR]
)

2021/12/11 11:55:13 - INFO - Running on spacecraft L5
2021/12/11 11:55:13 - INFO - Setting boto path to: /home/aperez/.boto
2021/12/11 11:55:13 - DEBUG - Running: gsutil ls gs://gcp-public-data-landsat/LT05/01/230/094/
2021/12/11 11:55:14 - INFO - Checking bucket: gsutil ls gs://gcp-public-data-landsat/LT05/01/230/094/
2021/12/11 11:55:14 - DEBUG - Copying meta with cmd: gsutil cp gs://gcp-public-data-landsat/LT05/01/230/094/LT05_L1TP_230094_20111011_20161005_01_T1/LT05_L1TP_230094_20111011_20161005_01_T1_MTL.txt /tmp/tmpy7p3z8ga
2021/12/11 11:55:14 - DEBUG - Copying meta with cmd: gsutil cp gs://gcp-public-data-landsat/LT05/01/230/094/LT05_L1TP_230094_20111027_20161005_01_T1/LT05_L1TP_230094_20111027_20161005_01_T1_MTL.txt /tmp/tmpy7p3z8ga


In [16]:
gcp_images.pd_filt

Unnamed: 0,product-id,base-url,date,clouds
0,LT05_L1TP_230094_20111011_20161005_01_T1,gs://gcp-public-data-landsat/LT05/01/230/094/L...,2011-10-11,7.0
1,LT05_L1TP_230094_20111027_20161005_01_T1,gs://gcp-public-data-landsat/LT05/01/230/094/L...,2011-10-27,12.0


In [17]:
# we will just download one
pd_filt = gcp_images.pd_filt.iloc[[0]]

### Download Selected Images

In [18]:
# Bands to Query
LANDSAT5_BANDS = ["B1", "B2", "B3", "B4", "B5"]
# Init Google Cloud bucket Downloader
bid = bucket_images_downloader(spacecraft="L5", bands=LANDSAT5_BANDS)
# Start Downloading with up to 5 simultaneous downloads
bid.execute(
    pd_filt["base-url"].to_list(),
    archive=IMAGES_RAW,
    max_proc_thread=5,
    force_download=False,
)

2021/12/11 11:55:15 - INFO - Running on spacecraft L5
2021/12/11 11:55:15 - INFO - Requesting bands ['B1', 'B2', 'B3', 'B4', 'B5']
2021/12/11 11:55:15 - INFO - Skipping ./products-int/images-raw/LT05_L1TP_230094_20111011_20161005_01_T1 (dir already existed).
