# OGC Demo: `kbatch`

Submitting a job (a notebook or a script) to run headlessly, immediately or on a schedule. 

In [1]:
import os
import time

import rasterio
import rio_cogeo
from s3fs import S3FileSystem

In [2]:
# dataset from NASA EarthData available on AWS S3
fp = "s3://modis-vi-nasa/MOD13A2.006"

In [3]:
nasa_s3 = S3FileSystem(anon=True)

In [4]:
nasa_s3.ls("s3://modis-vi-nasa")

['modis-vi-nasa/MOD13A1.006',
 'modis-vi-nasa/MOD13A2.006',
 'modis-vi-nasa/MYD13A1.006',
 'modis-vi-nasa/MYD13A2.006',
 'modis-vi-nasa/validation']

In [5]:
tff_files_s3 = nasa_s3.ls(fp)

In [6]:
dir_file_s3 = tff_files_s3.pop()

In [7]:
dir_file_s3

'modis-vi-nasa/MOD13A2.006/directories.txt'

In [8]:
print(len(tff_files_s3))
print(tff_files_s3[-1:])

54
['modis-vi-nasa/MOD13A2.006/2020.05.08.tif']


In [9]:
# the `dir_file_s3` contains links to data from NASA's Land Processes Distributed
# Active Archive Center (LP DAAC) located at the USGS Earth Resources Observation and
# Science (EROS) Center. Downloading the data requires NASA Earthdata Login.
 
# dir_file = "dir_files.txt"
# nasa_s3.download(dir_file_s3, dir_file)

In [10]:
# needed to read data from S3 anonymously
os.environ["AWS_NO_SIGN_REQUEST"] = "YES"

In [11]:
log_file = "ogc-cog-validation.txt"

In [14]:
for tff in tff_files_s3[:10]:
    tff = "s3://" + tff
    valid, errors, warnings = rio_cogeo.cog_validate(tff)
    with open(log_file, "a") as f:
        current_time = time.strftime("%Y-%m-%d-%H:%M:%S", time.localtime())
        f.write(f"{current_time} -- {tff}: ")
        if valid:
            f.write("Valid COG format\n")
        else:
            f.write("Invalid COG format\n")
            f.write(errors)
            f.write(warnings)

In [1]:
# only here for testing purposes
key = "MyKeY"
secret = "MySeCrEt"

In [16]:
qs_s3 = S3FileSystem(key=key, secret=secret)
fp = "s3://quansight-public"

In [17]:
log_file_s3 = fp + "/" + log_file

In [19]:
qs_s3.put_file(log_file, log_file_s3)