# Downloading Imagery

There are two options for downloading imagery: tasks, and local download. Tasks are better for large areas, local download is better for small.

In [6]:
import sys, os, inspect, logging, json, importlib
import pandas as pd
import geopandas as gpd
import rasterio

from rasterio.profiles import DefaultGTiffProfile
from gbdxtools import Interface
from gbdxtools import CatalogImage
from shapely.geometry import box

cmd_folder = os.path.dirname(os.getcwd())
if cmd_folder not in sys.path:
    sys.path.insert(0, cmd_folder)

from GOST_GBDx_Tools import gbdxTasks
from GOST_GBDx_Tools import gbdxURL_misc

#In order for the interface to be properly authenticated, follow instructions here:
#   http://gbdxtools.readthedocs.io/en/latest/user_guide.html
#   For Ben, the .gbdx-config file belongs in C:\Users\WB411133 (CAUSE no one else qill f%*$&ing tell you that)
gbdx = Interface()
gbdx.s3.info
curTasks = gbdxTasks.GOSTTasks(gbdx)
gbdxUrl = gbdxURL_misc.gbdxURL(gbdx)


### Check if imagery is ordered

For catids that are not ordered (location: not_delivered), remove them from the list of catIDs in the next cells

In [2]:
catIDs = ['1020010034A86700','102001002B6D3A00','10400100029E0100','103001002F8E8300']
inAOI = r"C:\temp\hti_aoi.shp"
outExtents = inAOI.replace(".shp", "_imageryExtents.csv")
outFolder = inAOI.replace(".shp", "")

inputAOI = gpd.read_file(inAOI)
if not os.path.exists(outFolder):
    os.makedirs(outFolder)

for c in catIDs:
    print(gbdx.ordering.status(gbdx.ordering.order(c)))

[{'acquisition_id': '1020010034A86700', 'state': 'delivered', 'location': 's3://receiving-dgcs-tdgplatform-com/011135131010_01_003'}]
[{'acquisition_id': '102001002B6D3A00', 'state': 'delivered', 'location': 's3://receiving-dgcs-tdgplatform-com/011135132010_01_003'}]
[{'acquisition_id': '10400100029E0100', 'state': 'delivered', 'location': 's3://receiving-dgcs-tdgplatform-com/011164197010_01_003'}]
[{'acquisition_id': '103001002F8E8300', 'state': 'delivered', 'location': 's3://receiving-dgcs-tdgplatform-com/011164198010_01_003'}]


# Local Download

In [13]:
# Remove those that do not exist in the catalog
catIDs = ['10400100029E0100','103001002F8E8300']
allVals = []
for c in catIDs:
    img = CatalogImage(c)
    allVals.append([c, box(*img.bounds)])
res = pd.DataFrame(allVals, columns=['catID','geometry'])
res.to_csv(outExtents)

In [26]:
#If the extents are good and area being extracted is not too large, download results
catIDs = ['10400100029E0100','103001002F8E8300']
for catID in catIDs:
    tempImg = CatalogImage(catID, pansharpen=True, acomp=True, dra=True)
    chip_image = tempImg.aoi(wkt=str(inputAOI['geometry'].iloc[0]))
    image_path = os.path.join(outFolder, "%s.tif" % catID)
    #This is an arbitrary size assessment
    if (chip_image.shape[1] * chip_image.shape[2] < 1000000000000) and not os.path.exists(image_path): 
        # Save output image to file
        cProfile = DefaultGTiffProfile(count=chip_image.shape[0], width=chip_image.shape[2], height=chip_image.shape[1],
                                      transform=chip_image.affine, crs={'init':'epsg:4326'})
        cProfile.update(dtype=chip_image.dtype)

        with rasterio.open(image_path, 'w', **cProfile) as out_raster:
            out_raster.write(chip_image)

        with open(os.path.join(outFolder, "%s_metadata.json" % catID), 'w') as out:
            json.dump(tempImg.metadata, out)

# Task Download

This is for large image downloads

In [None]:
importlib.reload(gbdxTasks)

catID = '103001002F8E8300'
outS3Folder = 'bps/HTI/Clara/%s' % catID
cImg = CatalogImage(catID)
download_task = gbdxTasks.GOSTTasks(gbdx)

sensor = download_task.sensorDict[cImg.metadata['image']['sensorAlias']]
download_workflow = download_task.createWorkflow(catID, str(inputAOI.unary_union), sensor, outS3Folder,
                                                 downloadImages=1,
                                                 aopPan=False, 
                                                 aopDra=False, 
                                                 aopAcomp = True, 
                                                 aopBands='Auto')

In [None]:
# Execute the workflow
workflowID = download_workflow.execute()

In [12]:
#Run this manually to check on the status
#gbdxUrl.descWorkflow(workflowID)['state']
gbdxUrl.descWorkflow('5233893148454813538')['state']

{'state': 'running', 'event': 'started'}

In [None]:
# This section will monitor workflows as the are executing, updates every minute
workflow_results = gbdxUrl.monitorWorkflows()

In [None]:
# this is a new command that I have not tested before
gbdx.s3.download(outS3Folder, localFolder)

In [None]:
# Once the above section is complete, run the following command to generate 
#    AWS commands to both list and download S3 contents
local_folder = "C:/temp/outFolder"
if not os.path.exists(local_folder):
    os.makedirs(local_folder)
listCommands = gbdxUrl.listS3Contents(outS3Folder, recursive=True)
downloadCommands = gbdxUrl.downloadS3Contents(outS3Folder, local_folder)

In [None]:
[print(x) for x in listCommands]

In [None]:
gbdx.s3.download(outS3Folder, localFolder)

In [None]:
# It is possible to use the below command to execute the download, 
#    but I prefer to do it in the command line
# gbdxUrl.executeAWS_file(downloadCommands, "C:/temp/outFolder/aws_commands.bat")