In [110]:
import geopandas
import json
import shapely
import shapely.geometry
from shapely.ops import transform
from shapely.geometry import Point
from shapely.geometry import Polygon
from pystac_client import Client 
from collections import defaultdict
import xarray
import rasterio as rio
import rioxarray
import os
import fiona
import numpy as np
import pandas as pd
from glob import glob 
from netrc import netrc
from subprocess import Popen
from platform import system
from getpass import getpass
import os
from osgeo import gdal

In [None]:
##### START OPTIONS #####
yoi = [2021]
#toi = ['15STT']
cloud_thres = 5
valid_months = [3,4,5,6,7,8,9]
root_path = "/data/"

## file paths
spath = root_path + f"CDL_HLS_dataframe{yoi[0]}"
image_index_file = root_path + f"image_index{yoi[0]}"
chip_file =  root_path + "chip_bbox.geojson"
chip_csv = root_path + "chip_tracker.csv"
kml_file = root_path + 'sentinel_tile_grid.kml'
cdl_reclass_csv = root_path + "cdl_freq.csv"
tile_tracker_csv = root_path + "tile_tracker.csv"

## folder paths
chip_dir = root_path + 'chips/'
tif_dir = root_path + 'tif/'
chip_dir_binary = root_path + 'chips_binary/'
chip_dir_multi = root_path + 'chips_multi/'

chip_dir_filt = root_path + 'chips_filtered/'
chip_dir_binary_filt = root_path + 'chips_binary_filtered/'
chip_dir_multi_filt = root_path + 'chips_multi_filtered/'

chip_qa_dir = root_path + 'chips_qa/'

#####  END OPTIONS  #####

make folders if needed

In [None]:
dirs_to_make = [chip_dir, chip_dir_binary, chip_qa_dir]
for folder in dirs_to_make:
    try:
        os.makedirs(folder)
    except FileExistsError:
        # directory already exists
        print('pass')
        pass

0 determine HLS tiles

In [None]:
with open("/cdl_training_data/data/chip_bbox.geojson", "r") as file:
    chips = json.load(file)
    
chip_ids = []
chip_x = []
chip_y = []
for item in chips['features']:
    #print(item)
    chip_ids.append(item['properties']['id'])
    chip_x.append(item['properties']['center'][0])
    chip_y.append(item['properties']['center'][1])


#chip_ids = a.fea
#print(a['features'][0]['properties']['center'])

In [None]:
# chips['features'][0]['geometry']['coordinates'][0] = chips['features'][0]['geometry']['coordinates'][0][::-1]   check with Hamed

In [None]:
# Load the HLS tiles and place there coordinates into a numpy array for processing later

fiona.drvsupport.supported_drivers['KML'] = 'rw'
tile_src = geopandas.read_file(kml_file, driver='KML')
tile_name = []
tile_x = []
tile_y = []
for tile_ind in range(tile_src.shape[0]):
    tile_name.append(tile_src.iloc[tile_ind].Name)
    tile_x.append(tile_src.iloc[tile_ind].geometry.centroid.x)
    tile_y.append(tile_src.iloc[tile_ind].geometry.centroid.y)
tile_name = np.array(tile_name)
tile_x = np.array(tile_x)
tile_y = np.array(tile_y)
tile_src = pd.concat([tile_src, tile_src.bounds], axis = 1)
#del tile_src
tile_src.head(5)

In [None]:
def find_tile(x,y):
# Identify closest tile
    s = (tile_x - x)**2+(tile_y - y)**2
    tname = tile_name[np.argmin(s)]
    return(tname)

initialize chip tracker csv

In [None]:
chip_df = pd.DataFrame({"chip_id" : chip_ids, "chip_x" : chip_x, "chip_y" : chip_y})
chip_df['tile'] = chip_df.apply(lambda row : find_tile(row['chip_x'], row['chip_y']), axis = 1)

In [None]:
## write to csv
check_file = glob(chip_csv)
if len(check_file) == 0:
    chip_df.to_csv(chip_csv, index=False)
else:
    print('file exists')

In [None]:
tiles = chip_df.tile.unique().tolist()
tiles

In [None]:
chip_df[chip_df.tile == '01SBU'].head(5)

In [None]:
tiles.remove('01SBU')
tiles

In [None]:
tile_tracker = pd.DataFrame({"tile":tiles})
tile_tracker['exclude'] = False
tile_tracker['tif_download'] = False
tile_tracker['tif_reproject'] = False
tile_tracker['chip'] = False
tile_tracker['filter_chips'] = False
# tile_tracker.head(50)

In [None]:
STAC_URL = 'https://cmr.earthdata.nasa.gov/stac'
catalog = Client.open(f'{STAC_URL}/LPCLOUD/')

In [None]:
for current_tile in tiles[0:2]:
    print(current_tile)
    ## find single chip for current tile
    chip_df_filt = chip_df.loc[chip_df.tile == current_tile]#.reset_index()
    print(chip_df_filt)
    first_chip_id = chip_df_filt.chip_id.iloc[0]
    print(first_chip_id)
    first_chip_index_in_json = chip_ids.index(first_chip_id)
    print(first_chip_index_in_json)
    roi = chips['features'][first_chip_index_in_json]['geometry']
    print(roi)
    search = catalog.search(
        collections = ['HLSS30.v2.0'],
        intersects = roi,
        datetime = '2021-03/2021-10',
    ) 
    num_results = search.matched()
    item_collection = search.get_all_items()
    print(num_results)
    print(list(item_collection))

In [112]:
list(item_collection)[0:10]

[<Item id=HLS.S30.T14SMF.2021062T172221.v2.0>,
 <Item id=HLS.S30.T14SME.2021062T172221.v2.0>,
 <Item id=HLS.S30.T14SMF.2021067T172049.v2.0>,
 <Item id=HLS.S30.T14SME.2021067T172049.v2.0>,
 <Item id=HLS.S30.T14SMF.2021077T171929.v2.0>,
 <Item id=HLS.S30.T14SME.2021077T171929.v2.0>,
 <Item id=HLS.S30.T14SMF.2021082T171951.v2.0>,
 <Item id=HLS.S30.T14SME.2021082T171951.v2.0>,
 <Item id=HLS.S30.T14SMF.2021087T171849.v2.0>,
 <Item id=HLS.S30.T14SME.2021087T171849.v2.0>]

In [None]:
len(item_collection)

In [114]:
item_collection[0].to_dict()

{'type': 'Feature',
 'stac_version': '1.0.0',
 'id': 'HLS.S30.T14SMF.2021062T172221.v2.0',
 'properties': {'eo:cloud_cover': 0,
  'datetime': '2021-03-03T17:33:45.493000Z',
  'start_datetime': '2021-03-03T17:33:45.493Z',
  'end_datetime': '2021-03-03T17:33:45.493Z'},
 'geometry': {'type': 'Polygon',
  'coordinates': [[[-100.1106837, 36.0515787],
    [-98.8916315, 36.056675],
    [-98.8902407, 37.0465323],
    [-100.1249353, 37.0412498],
    [-100.1106837, 36.0515787]]]},
 'links': [{'rel': 'self',
   'href': 'https://cmr.earthdata.nasa.gov/stac/LPCLOUD/collections/HLSS30.v2.0/items/HLS.S30.T14SMF.2021062T172221.v2.0'},
  {'rel': 'parent',
   'href': 'https://cmr.earthdata.nasa.gov/stac/LPCLOUD/collections/HLSS30.v2.0'},
  {'rel': 'collection',
   'href': 'https://cmr.earthdata.nasa.gov/stac/LPCLOUD/collections/HLSS30.v2.0'},
  {'rel': <RelType.ROOT: 'root'>,
   'href': 'https://cmr.earthdata.nasa.gov/stac/LPCLOUD/',
   'type': <MediaType.JSON: 'application/json'>,
   'title': 'LPCLOUD'

In [None]:
item_collection[0].assets

In [None]:
cloudcover = 5
spatialcover = 100
s30_bands = ['B8A', 'B04','B03', 'B02', 'Fmask']

In [None]:
band_links = []

for i in item_collection:
    if i.properties['eo:cloud_cover'] <= cloudcover:
        if i.collection_id == 'HLSS30.v2.0':
            #print(i.properties['eo:cloud_cover'])
            bands = s30_bands
    for a in i.assets:
            if any(b==a for b in bands):
                band_links.append(i.assets[a].href)

In [108]:
band_links[:10]

['https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T14SMF.2021062T172221.v2.0/HLS.S30.T14SMF.2021062T172221.v2.0.B8A.tif',
 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T14SMF.2021062T172221.v2.0/HLS.S30.T14SMF.2021062T172221.v2.0.B02.tif',
 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T14SMF.2021062T172221.v2.0/HLS.S30.T14SMF.2021062T172221.v2.0.B03.tif',
 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T14SMF.2021062T172221.v2.0/HLS.S30.T14SMF.2021062T172221.v2.0.Fmask.tif',
 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T14SMF.2021062T172221.v2.0/HLS.S30.T14SMF.2021062T172221.v2.0.B04.tif',
 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T14SME.2021062T172221.v2.0/HLS.S30.T14SME.2021062T172221.v2.0.B04.tif',
 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS

In [None]:
tile_dicts = defaultdict(list)    

In [None]:
for l in band_links:
    tile = l.split('.')[-6]
    tile_dicts[tile].append(l)

In [None]:
    tile_dicts.keys()

In [None]:
tile_dicts['T14SMF'][:5]

In [None]:
tile_links_T14SMF = tile_dicts['T14SMF']
tile_links_T14SME = tile_dicts['T14SME']

In [77]:
tile_links_T14SME[:10]

['https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T14SME.2021062T172221.v2.0/HLS.S30.T14SME.2021062T172221.v2.0.B04.tif',
 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T14SME.2021062T172221.v2.0/HLS.S30.T14SME.2021062T172221.v2.0.B8A.tif',
 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T14SME.2021062T172221.v2.0/HLS.S30.T14SME.2021062T172221.v2.0.Fmask.tif',
 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T14SME.2021062T172221.v2.0/HLS.S30.T14SME.2021062T172221.v2.0.B02.tif',
 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T14SME.2021062T172221.v2.0/HLS.S30.T14SME.2021062T172221.v2.0.B03.tif',
 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T14SME.2021067T172049.v2.0/HLS.S30.T14SME.2021067T172049.v2.0.B8A.tif',
 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS

In [78]:
bands_dicts = defaultdict(list)

In [85]:
for b in tile_links_T14SME:
    band = b.split('.')[-2]
    bands_dicts[band].append(b)

In [86]:
bands_dicts.keys()

dict_keys(['B04', 'B8A', 'Fmask', 'B02', 'B03'])

In [None]:
bands_dicts['B04']

In [82]:
# for k, v in bands_dicts.items():
    name = (f'S3_T14SME_{k}_Links.txt')
    with open(f'./data/{name}', 'w') as f:    # use ../tutorials/data/{name} as your path if running the notebook from "tutorials-template"
        for l in v:
            s3l = l.replace('https://data.lpdaac.earthdatacloud.nasa.gov/', 's3://')
            f.write(f"{s3l}" + '\n')

In [95]:
urs = 'urs.earthdata.nasa.gov'    # Earthdata URL endpoint for authentication
prompts = ['Enter NASA Earthdata Login Username: ',
           'Enter NASA Earthdata Login Password: ']

# Determine the OS (Windows machines usually use an '_netrc' file)
netrc_name = "_netrc" if system()=="Windows" else ".netrc"

# Determine if netrc file exists, and if so, if it includes NASA Earthdata Login Credentials
try:
    netrcDir = os.path.expanduser(f"~/{netrc_name}")
    netrc(netrcDir).authenticators(urs)[0]

# Below, create a netrc file and prompt user for NASA Earthdata Login Username and Password
except FileNotFoundError:
    homeDir = os.path.expanduser("~")
    Popen('touch {0}{2} | echo machine {1} >> {0}{2}'.format(homeDir + os.sep, urs, netrc_name), shell=True)
    Popen('echo login {} >> {}{}'.format(getpass(prompt=prompts[0]), homeDir + os.sep, netrc_name), shell=True)
    Popen('echo \'password {} \'>> {}{}'.format(getpass(prompt=prompts[1]), homeDir + os.sep, netrc_name), shell=True)
    # Set restrictive permissions
    Popen('chmod 0600 {0}{1}'.format(homeDir + os.sep, netrc_name), shell=True)

    # Determine OS and edit netrc file if it exists but is not set up for NASA Earthdata Login
except TypeError:
    homeDir = os.path.expanduser("~")
    Popen('echo machine {1} >> {0}{2}'.format(homeDir + os.sep, urs, netrc_name), shell=True)
    Popen('echo login {} >> {}{}'.format(getpass(prompt=prompts[0]), homeDir + os.sep, netrc_name), shell=True)
    Popen('echo \'password {} \'>> {}{}'.format(getpass(prompt=prompts[1]), homeDir + os.sep, netrc_name), shell=True)

Enter NASA Earthdata Login Username:  ········
Enter NASA Earthdata Login Password:  ········


In [101]:
data_url = item_collection[0].assets['B04'].href
data_url

'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T14SMF.2021062T172221.v2.0/HLS.S30.T14SMF.2021062T172221.v2.0.B04.tif'

In [102]:
rio_env = rio.Env(GDAL_DISABLE_READDIR_ON_OPEN='EMPTY',
                  GDAL_HTTP_COOKIEFILE=os.path.expanduser('~/cookies.txt'),
                  GDAL_HTTP_COOKIEJAR=os.path.expanduser('~/cookies.txt'))
rio_env.__enter__()

<rasterio.env.Env at 0x7f742d472280>

In [103]:
da = rioxarray.open_rasterio(data_url)

In [104]:
da

In [105]:
da.squeeze('band', drop=True)

In [125]:
gdal.SetConfigOption('GDAL_HTTP_COOKIEFILE','~/cookies.txt')
gdal.SetConfigOption('GDAL_HTTP_COOKIEJAR', '~/cookies.txt')
gdal.SetConfigOption('GDAL_DISABLE_READDIR_ON_OPEN','EMPTY_DIR')
gdal.SetConfigOption('CPL_VSIL_CURL_ALLOWED_EXTENSIONS','TIF')

In [128]:
# Use vsicurl to load the data directly into memory (be patient, may take a few seconds)
for e in band_links:
    if e.rsplit('.', 2)[-2] == bands[0]: # NIR index
        nir = rio.open(e)
    elif e.rsplit('.', 2)[-2] == bands[1]: # red index
        red = rio.open(e)
    elif e.rsplit('.', 2)[-2] == bands[2]: # blue index
        blue = rio.open(e)
    elif e.rsplit('.', 2)[-2] == bands[3]: # Fmask index
        fmask = rio.open(e)
print("The COGs have been loaded into memory!")

The COGs have been loaded into memory!


In [129]:
nir.meta

{'driver': 'GTiff',
 'dtype': 'int16',
 'nodata': -9999.0,
 'width': 3660,
 'height': 3660,
 'count': 1,
 'crs': CRS.from_wkt('PROJCS["UTM Zone 14, Northern Hemisphere",GEOGCS["Unknown datum based upon the WGS 84 ellipsoid",DATUM["Not specified (based on WGS 84 spheroid)",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",0],PARAMETER["central_meridian",-99],PARAMETER["scale_factor",0.9996],PARAMETER["false_easting",500000],PARAMETER["false_northing",0],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH]]'),
 'transform': Affine(30.0, 0.0, 399960.0,
        0.0, -30.0, 4000020.0)}

In [138]:
originalName = nir.name.rsplit('/', 1)[-1]  # Grab the original HLS S30 granule name
originalName

'HLS.S30.T14SME.2021302T172441.v2.0.B8A.tif'

In [140]:
# outName = f"{originalName.split('.B')[0]}_B8A.tif"  # Generate output name from the original filename
# tempName = 'temp.tif'                               # Set up temp file
# outName

'HLS.S30.T14SME.2021302T172441.v2.0_B8A.tif'