In [None]:
from sentinelsat import SentinelAPI
import importlib
import numpy as np

# DC Hackathon ARD Preparation Master Template

In this notebook all the major functions for processing (COG + sen2cor) and preparing (yaml creation) for S1 and S2 ARD for ingestion + use within a DC (CIAB).

Structure of the notebook is as follows:
- Objectives of preparation activities
- Arranging the S2 processing blocks
- Template S2 processing & preparation
- Template S1 processing & preparation

## Objectives of preparation activities (in priority order):
- process & upload all L2A for granules x,y,z,a in 2019 (approx 200 scenes)
- process & upload all L1C for granules x,y,z,a in last six months of 2018 (approx 240 scenes) to give for annual signature

- [ process & upload all S1 for first six months of 2018 (to give full 2018 signature) - ***nice to have*** ]
- [ process & upload all L2C for first six months of 2018 (approx another 240 scenes) to give full 2018 signature - ***nice to have*** ]

remember to amend permissions of bucket directories
remember this exists... https://registry.opendata.aws/sentinel-2/

## AOI for all 4 S2 granules

In [None]:
#wkt_aoi = "POLYGON((176 -15,-178 -15,-178 -20,176 -20,176 -15))" # main fiji test
#wkt_aoi = "POLYGON((178.45 -17.67,178.58 -17.67,178.58 -17.78,178.45 -17.78,178.45 -17.67))" # first granule east coast
wkt_aoi = "POLYGON((177.52 -17.54,178.37 -17.54,178.37 -18.31,177.52 -18.31,177.52 -17.54))" # four granules for hackathon

## S2 L2A products

assumed that l1c must be processed for all products before 2019. not necessarily true, l2a is back to just before 2019 but is a nice window to aim for...


In [None]:
# Query ESA Open Access Hub using sentinelsat tool.
esa_api = SentinelAPI('tmj21','Welcome12!')
#res = esa_api.query(wkt_aoi,platformname='Sentinel-2',producttype='S2MSI1C')
#res = esa_api.to_geodataframe(res)
res = esa_api.query(wkt_aoi,
                    platformname='Sentinel-2',
                    producttype='S2MSI2A',
                    date = ("20190101", "20190601")
                    #date = (20190507), date(2015, 12, 29)
                   )
res = esa_api.to_geodataframe(res)
print (res.shape)

In [None]:
# drop already processing granule...
res['tileid'] = res.title.str[-22:-16]
res.sort_values(by ='beginposition')
print ( np.unique(res.tileid.values) )
print ( len(res.title.values) )
print ( res.title.values )

In [None]:
import glob
import shutil
from datetime import datetime
%load_ext autoreload
%autoreload 2

from hack_scripts.hackprep_helper_functions import *

non_cog_dir = "../S2_COG_TEST/non_cog/"
cog_dir = "../S2_COG_TEST/cog/"
s3_bucket = "test.data.frontiersi.io"
s3_dir = "catapult/sentinel2/rolling_w_esa/"

n = 0

# eventual MASTER S2 L2A function requiring esa query df and working directoy
for d_id, original_scene_dir, scene_name in zip(res.uuid.values, res.filename.values, res.title):
    # print ( d_id, original_scene_dir, scene_name )
    
    n+=1
    
    #print (scene_dir)
    
    tileid = original_scene_dir.split('_')[-2]
    
    # shorten scene name - wayyyy too long...
    scene_name = scene_name[:26] + '_' + tileid
    print ( '#### SCENE {} {} of {} ####'.format(scene_name, n, res.shape[0]) )
    
    # create full path for original and cog scenes
    original_scene_dir = non_cog_dir + original_scene_dir + '/'
    scene_dir = cog_dir + scene_name + '/'
    print ( 'Target original scene dir: {}'.format(original_scene_dir) )
    print ( 'Tarket scene dir: {}'.format(scene_dir) )
    print ( 'Time: {}'.format(str(datetime.today().strftime('%Y-%m-%d %H:%M:%S'))) )
    
    download_extract_s2(d_id, non_cog_dir, original_scene_dir)
 
    print ( 'Time: {}'.format(str(datetime.today().strftime('%Y-%m-%d %H:%M:%S'))) )

    # create (or clear) cog scene directory
    if not os.path.exists(scene_dir):
        print ( 'Creating scene cog directory: {}'.format(scene_dir) )
        os.mkdir(scene_dir)
    else:
        print ( 'Scene cog directory already exists so passing: {}'.format(scene_dir) )
    
    copy_s2_metadata(original_scene_dir, scene_dir, scene_name)

    print ( 'Time: {}'.format(str(datetime.today().strftime('%Y-%m-%d %H:%M:%S'))) )
    
    conv_s2scene_cogs(original_scene_dir, scene_dir, scene_name)

    print ( 'Time: {}'.format(str(datetime.today().strftime('%Y-%m-%d %H:%M:%S'))) )

    create_yaml(scene_dir, 's2')
    
    # s3_upload_cogs(glob.glob(scene_dir + '*'), s3_bucket, s3_dir)
    
    print ( 'Time: {}'.format(str(datetime.today().strftime('%Y-%m-%d %H:%M:%S'))) )
    
    print ( 'Nuking original directory: {}'.format(original_scene_dir) )
    # shutil.rmtree(original_scene_dir)
    
    print ( 'Nuking intermediary scene cog dir: {}'.format(scene_dir) )
    
    print ( ' #### COMPLETED #### '.format(scene_name) )
    
    
    

## S2 L1C products

assumed that l1c must be processed for all products before 2019. not necessarily true, l2a is back to just before 2019 but is a nice window to aim for...


In [None]:
# Query ESA Open Access Hub using sentinelsat tool.
esa_api = SentinelAPI('tmj21','Welcome12!')
#res = esa_api.query(wkt_aoi,platformname='Sentinel-2',producttype='S2MSI1C')
#res = esa_api.to_geodataframe(res)
res = esa_api.query(wkt_aoi,
                    platformname='Sentinel-2',
                    producttype='S2MSI1C',
                    date = ("20180701", "20190101")
                    #date = (20190507), date(2015, 12, 29)
                   )
res = esa_api.to_geodataframe(res)
print (res.shape)

In [None]:
# drop already processing granule...
res['tileid'] = res.title.str[-22:-16]
res = res.sort_values(by ='beginposition', ascending=False)
print ( np.unique(res.tileid.values) )
print ( len(res.title.values) )
print ( res.title.values )

In [None]:
import glob
import shutil
from datetime import datetime
%load_ext autoreload
%autoreload 2

from hack_scripts.hackprep_helper_functions import *

non_cog_dir = "../S2_sen2cor_COG/non_cog/"
cog_dir = "../S2_sen2cor_COG/cog/"
s3_bucket = "test.data.frontiersi.io"
s3_dir = "catapult/sentinel2/rolling_w_esa/"
sen2cor = "/data/CommonSensing/Code/Sen2Cor-02.08.00-Linux64/bin/L2A_Process"

n = 0

# eventual MASTER S2 L2A function requiring esa query df and working directoy
for d_id, original_scene_dir, scene_name in zip(res.uuid.values, res.filename.values, res.title):
    # print ( d_id, original_scene_dir, scene_name )
    
    n+=1
    
    #print (scene_dir)
    
    tileid = original_scene_dir.split('_')[-2]
    
    # shorten scene name - wayyyy too long...
    scene_name = scene_name[:26] + '_' + tileid
    print ( '#### SCENE {} {} of {} ####'.format(scene_name, n, res.shape[0]) )
    
    # create full path for original and cog scenes
    original_scene_dir = non_cog_dir + original_scene_dir + '/'
    scene_dir = cog_dir + scene_name + '/'
    print ( 'Target original scene dir: {}'.format(original_scene_dir) )
    print ( 'Tarket scene dir: {}'.format(scene_dir) )
    print ( 'Time: {}'.format(str(datetime.today().strftime('%Y-%m-%d %H:%M:%S'))) )
    
    download_extract_s2(d_id, non_cog_dir, original_scene_dir)
 
    print ( 'Time: {}'.format(str(datetime.today().strftime('%Y-%m-%d %H:%M:%S'))) )

    l2a_dir = original_scene_dir.replace('_MSIL1C', '_MSIL2A')[:-39]
    
    # see if l2a prod exists to avoid running sen2cor unless needed
    l2a_dir = glob.glob(original_scene_dir.replace('_MSIL1C', '_MSIL2A')[:-39]+'*')[0]+'/'
    print ('pre', l2a_dir )
    
    if not os.path.exists(l2a_dir):
    
        sen2cor_correction(sen2cor, original_scene_dir, non_cog_dir)
        # shutil.rmtree(original_scene_dir)

        l2a_dir = glob.glob(original_scene_dir.replace('_MSIL1C', '_MSIL2A')[:-39]+'*')[0] +'/'
        print ('re-defining original to l2a: {}'.format(l2a_dir) )
    
    original_scene_dir = l2a_dir
    
    # create (or clear) cog scene directory
    if not os.path.exists(scene_dir):
        print ( 'Creating scene cog directory: {}'.format(scene_dir) )
        os.mkdir(scene_dir)
    else:
        print ( 'Scene cog directory already exists so passing: {}'.format(scene_dir) )
    
    copy_s2_metadata(original_scene_dir, scene_dir, scene_name)

    print ( 'Time: {}'.format(str(datetime.today().strftime('%Y-%m-%d %H:%M:%S'))) )
    
    conv_s2scene_cogs(original_scene_dir, scene_dir, scene_name)

    print ( 'Time: {}'.format(str(datetime.today().strftime('%Y-%m-%d %H:%M:%S'))) )

    create_yaml(scene_dir, 's2')
    
    s3_upload_cogs(glob.glob(scene_dir + '*'), s3_bucket, s3_dir)
    
    print ( 'Time: {}'.format(str(datetime.today().strftime('%Y-%m-%d %H:%M:%S'))) )
    
    print ( 'Nuking original directory: {}'.format(original_scene_dir) )
    # shutil.rmtree(original_scene_dir)
    
    print ( 'Nuking intermediary scene cog dir: {}'.format(scene_dir) )
    
    print ( ' #### COMPLETED #### '.format(scene_name) )
