In [34]:
import rioxarray 
import xarray
import rasterio
import nasa_hls
import os
import geopandas
import urllib.request as urlreq
import pandas as pd
import fiona
import numpy as np
import json
import shutil
import datetime
from glob import glob
from rasterio.enums import Resampling
from rasterio import Affine
from rasterio.crs import CRS
import matplotlib.pyplot as plt
from subprocess import Popen, PIPE

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

%matplotlib inline

In [43]:
##### START OPTIONS #####
yoi = [2021]
#toi = ['15STT']
cloud_thres = 5
valid_months = [3,4,5,6,7,8,9]
root_path = "/data/"
spath = root_path + f"CDL_HLS_dataframe{yoi[0]}.csv"
image_index_file = root_path + f"image_index{yoi[0]}.csv"
hdf_dir = root_path + "hdf/"
chip_geojson_dir = root_path + 'chip_geojson/'
chip_output_dir = root_path + 'chip_output/'
chip_cdl_dir = root_path + 'chip_cdl/'
chip_file =  root_path + "chip_bbox.geojson"
ct = datetime.datetime.now()
chip_csv = root_path + "chip_csv"
kml_file = root_path + 'sentinel_tile_grid.kml'
#####  END OPTIONS  #####

determine HLS tiles

In [3]:
with open("/cdl_training_data/data/chip_bbox.geojson", "r") as file:
    chips = json.load(file)
    
chip_ids = []
chip_x = []
chip_y = []
for item in chips['features']:
    #print(item)
    chip_ids.append(item['properties']['id'])
    chip_x.append(item['properties']['center'][0])
    chip_y.append(item['properties']['center'][1])


#chip_ids = a.fea
#print(a['features'][0]['properties']['center'])

In [4]:
# Load the HLS tiles and place there coordinates into a numpy array for processing later

fiona.drvsupport.supported_drivers['KML'] = 'rw'
tile_src = geopandas.read_file(kml_file, driver='KML')
tile_name = []
tile_x = []
tile_y = []
for tile_ind in range(tile_src.shape[0]):
    tile_name.append(tile_src.iloc[tile_ind].Name)
    tile_x.append(tile_src.iloc[tile_ind].geometry.centroid.x)
    tile_y.append(tile_src.iloc[tile_ind].geometry.centroid.y)
tile_name = np.array(tile_name)
tile_x = np.array(tile_x)
tile_y = np.array(tile_y)
tile_src = pd.concat([tile_src, tile_src.bounds], axis = 1)
#del tile_src
tile_src.head(5)

Unnamed: 0,Name,Description,geometry,minx,miny,maxx,maxy
0,01CCV,TILE PROPERTIES<br><table border=0 cellpadding...,GEOMETRYCOLLECTION Z (POLYGON Z ((180.00000 -7...,-180.0,-73.064633,180.0,-72.012478
1,01CDH,TILE PROPERTIES<br><table border=0 cellpadding...,GEOMETRYCOLLECTION Z (POLYGON Z ((180.00000 -8...,-180.0,-83.835334,180.0,-82.79672
2,01CDJ,TILE PROPERTIES<br><table border=0 cellpadding...,GEOMETRYCOLLECTION Z (POLYGON Z ((180.00000 -8...,-180.0,-82.939452,180.0,-81.906947
3,01CDK,TILE PROPERTIES<br><table border=0 cellpadding...,GEOMETRYCOLLECTION Z (POLYGON Z ((180.00000 -8...,-180.0,-82.044055,180.0,-81.016439
4,01CDL,TILE PROPERTIES<br><table border=0 cellpadding...,GEOMETRYCOLLECTION Z (POLYGON Z ((180.00000 -8...,-180.0,-81.14807,180.0,-80.124456


In [5]:
def find_tile(x,y):
# Identify closest tile
    s = (tile_x - x)**2+(tile_y - y)**2
    tname = tile_name[np.argmin(s)]
    return(tname)

In [6]:
chip_df = pd.DataFrame({"chip_id" : chip_ids, "chip_x" : chip_x, "chip_y" : chip_y})

chip_df['tile'] = chip_df.apply(lambda row : find_tile(row['chip_x'], row['chip_y']), axis = 1)
chip_df
tiles = chip_df.tile.unique().tolist()
tiles

['14SMF',
 '14SME',
 '14SMD',
 '14SMC',
 '14SMB',
 '14SMA',
 '14SNC',
 '14SNB',
 '14SNA',
 '14SNE',
 '14SND',
 '14SNF',
 '14SPA',
 '14SPC',
 '14SPB',
 '14SPE',
 '14SPD',
 '14SPF',
 '01SBU',
 '14SQA',
 '14SQC',
 '14SQB',
 '14SQF',
 '14SQE',
 '14SQD',
 '15STA',
 '15STV',
 '15STU',
 '15STT',
 '15STS',
 '15STR',
 '15SUA',
 '15SUV',
 '15SUU',
 '15SUT',
 '15SUS',
 '15SUR',
 '15SVA',
 '15SVV',
 '15SVU',
 '15SVT',
 '15SVS',
 '15SVR']

what's going on with these??

In [7]:
chip_df[chip_df.tile == '01SBU'].head(5)

Unnamed: 0,chip_id,chip_x,chip_y,tile
2190,027_030,-97.067552,35.011722,01SBU
2191,027_031,-97.066718,34.951663,01SBU
2192,027_032,-97.065886,34.891601,01SBU
2193,027_033,-97.065055,34.831535,01SBU
2194,027_034,-97.064226,34.771465,01SBU


make folders if needed

In [8]:
try:
    os.makedirs(hdf_dir)
except FileExistsError:
    # directory already exists
    print('pass')
    pass


pass


query and download hdf files

In [11]:
toi = tiles[2:3]
toi

['14SMD']

In [12]:
HLSdf = nasa_hls.get_available_datasets(
        years = yoi,
        products = ["S30"],
        tiles = toi,
        return_list = False)
        
#HLSdf.to_csv(spath, mode='w')

100%|██████████| 1/1 [00:00<00:00,  1.11it/s]


In [13]:
HLSdf['month'] = pd.DatetimeIndex(HLSdf['date']).month

In [14]:
## filter by month
HLSdf = HLSdf[HLSdf.month.isin(valid_months)].reset_index(drop = True)
HLSdf

Unnamed: 0,product,tile,date,url,month
0,S30,14SMD,2021-03-03,https://hls.gsfc.nasa.gov/data/v1.4/S30/2021/1...,3
1,S30,14SMD,2021-03-05,https://hls.gsfc.nasa.gov/data/v1.4/S30/2021/1...,3
2,S30,14SMD,2021-03-08,https://hls.gsfc.nasa.gov/data/v1.4/S30/2021/1...,3
3,S30,14SMD,2021-03-10,https://hls.gsfc.nasa.gov/data/v1.4/S30/2021/1...,3
4,S30,14SMD,2021-03-13,https://hls.gsfc.nasa.gov/data/v1.4/S30/2021/1...,3
5,S30,14SMD,2021-03-15,https://hls.gsfc.nasa.gov/data/v1.4/S30/2021/1...,3
6,S30,14SMD,2021-03-18,https://hls.gsfc.nasa.gov/data/v1.4/S30/2021/1...,3
7,S30,14SMD,2021-03-20,https://hls.gsfc.nasa.gov/data/v1.4/S30/2021/1...,3
8,S30,14SMD,2021-03-23,https://hls.gsfc.nasa.gov/data/v1.4/S30/2021/1...,3
9,S30,14SMD,2021-03-25,https://hls.gsfc.nasa.gov/data/v1.4/S30/2021/1...,3


In [15]:
## download hdf
for k in range(len(HLSdf)):
    url = HLSdf.at[k, "url"]
    local_name = url.split('/')[-1].replace("\n", "").replace('.hdf', '')
    HLSdf.at[k, "image_id"] = local_name
    try:
        urlreq.urlretrieve(url, filename = hdf_dir+local_name + '.hdf')
    except:
        print(local_name + " failed")
        continue
               
HLSdf.to_csv(spath, mode='w')

extract hdf metadata, filter to 3 scenes per tile

In [16]:
def get_metadata_from_hdf_mine(src, fields=["cloud_cover", "spatial_coverage"]):
    """Get metadata from a nasa-hls hdf file. See HLS user guide for valid fields.
    
    HLS User Guide - see Section 6.6: 
    
    https://hls.gsfc.nasa.gov/wp-content/uploads/2019/01/HLS.v1.4.UserGuide_draft_ver3.1.pdf
    """
    band="QA"
    cmd = f'gdalinfo HDF4_EOS:EOS_GRID:"{src}":Grid:{band}'
#    print(cmd)
    p = Popen(cmd, stdout=PIPE, shell=True)
    output, err = p.communicate()
    output = str(output)[2:-1].replace("\\n", "\n")
    rc = p.returncode
    metadata = {}
    for line in output.split("\n"):
        for field in fields:
            if field in line:
                metadata[field] = line.split("=")[1].strip()
                try:
                    metadata[field] = float(metadata[field])
                except:
                    pass
    for field in fields:
        if field not in metadata.keys():
            warnings.warn(f"Could not find metadata for field '{field}'.")
    return metadata

In [17]:
image_index = pd.DataFrame(columns = ['image_id', 'tile', 'date', 'month', 'cloud_coverage', 'spatial_coverage'])

candidate_hdf = sorted(glob(hdf_dir + '*.hdf'))

for img in candidate_hdf:
   # print(img)
    local_name = img.split('/')[-1]
    try:
      #  print(hdf_dir+local_name)
        md = get_metadata_from_hdf_mine(hdf_dir+local_name)
    except:
        print(img + ' skipped')
        continue
   # print(md)
    cloud_cover = int(md['cloud_cover'])
    spatial_coverage = int(md['spatial_coverage'])
    image_id = local_name.replace('.hdf', '')
    tname = local_name.split('.')[2]
    date = local_name.split('.')[3]
    image_date_string = image_id.split('.')[3]
    image_date = pd.to_datetime(image_date_string, format="%Y%j").date()
    image_month = image_date.month
    
    new_row = pd.DataFrame({'image_id':  [image_id],
               'tile': [tname],
               'date': [image_date],
               'month': [image_month],
               'cloud_coverage': [cloud_cover],
               'spatial_coverage': [spatial_coverage]})
    image_index = pd.concat([image_index, new_row], ignore_index = True)


image_index.to_csv(image_index_file)

In [18]:
#image_index.sort_values(['cloud_coverage']).head(300)

In [19]:
image_index.tile.unique().tolist()

['T14SMD']

Select 3 best images (need to loop this over tiles)

In [20]:
def convert_hdf_to_cog(scene_id, product = "S30"):
    
    """
    This function receives the scene_id of an HLS scene (in a format similar to "HLS.S30.T14RNS.2020005.v1.4"
    and converts the scene from HDF format to COG. 
    
    Assumptions:
    - The corresponding HDF file for the scene is located at `/data/hdf/scene_id.hdf`
    - The output will be written to `/data/tif/scene_id/*.tif` and contains all the bands. 
    
    Inputs:
    - scene_id: The scene ID of the HLS scene
    - product: the HLS product ID. Default is S30, but it can be S30, L30, S30_ANGLES, L30_ANGLES
    
    """
    
    import os
    cmd = f"python3 /hls-hdf_to_cog/hls_hdf_to_cog/hls_hdf_to_cog.py --product {product} /data/hdf/{scene_id}.hdf --output-dir /data/tif/{scene_id}/"
    os.system(cmd)
    image_folder = '/data/tif/' + scene_id + '/'
    tif_count = len(glob(image_folder + '*.tif'))
    if(tif_count == 14):
        return(True)
    else:
        shutil.rmtree(image_folder)
        return(False)


convert selected hdf to cog

In [21]:
def convert_first_date(cand_images):
    """
    Converts first date image from data frame. 
    If conversion fails, the image is removed and the next "first" image is tried.
    Returns the converted image row, the image id, and the data frame with any failed images removed.
    """
    process_first = False
    while process_first == False:
        first_image = cand_images.head(1)
        first_image_id = (first_image.image_id.tolist())[0]
        print(first_image_id)
        process_first = convert_hdf_to_cog(first_image_id)
        print(process_first)
        if(process_first == False):
            cand_images = cand_images[cand_images.image_id != first_image_id]
    return(first_image, first_image_id, cand_images)

def convert_last_date(cand_images):
    """
    Converts last date image from data frame. 
    If conversion fails, the image is removed and the next "last" image is tried.
    Returns the converted image row, the image id, and the data frame with any failed images removed.
    """
    process_last = False
    while process_last == False:
        last_image = cand_images.tail(1)
        last_image_id = (last_image.image_id.tolist())[0]
        print(last_image_id)
        process_last = convert_hdf_to_cog(last_image_id)
        print(process_last)
        if(process_last == False):
            cand_images = cand_images[cand_images.image_id != last_image_id]
    return(last_image, last_image_id, cand_images)

def convert_middle_date(cand_images):
    """
    Converts middle date image from data frame. 
    If conversion fails, the image is removed and the next "middle" image is tried.
    Returns the converted image row, the image id, and the data frame with any failed images removed.
    """
    process_middle = False
    cand_image_count = len(cand_images)
    while process_middle == False:
        middle_image = cand_images.head(cand_image_count // 2).tail(1)
        middle_image_id = (middle_image.image_id.tolist())[0]
        print(middle_image_id)
        process_middle = convert_hdf_to_cog(middle_image_id)
        print(process_middle)
        if(process_middle == False):
            cand_images = cand_images[cand_images.image_id != middle_image_id]
    return(middle_image, middle_image_id, cand_images)




In [22]:
tiles_to_process = image_index.tile.unique().tolist()
image_index['converted'] = False

for tile in tiles_to_process:
    print(tile)
    cand_images = image_index[(image_index.tile == tile) &(image_index.spatial_coverage == 100) & (image_index.cloud_coverage <= cloud_thres)]
    print(cand_images)
    first_image, first_image_id, cand_images = convert_first_date(cand_images)
    last_image, last_image_id, last_images = convert_last_date(cand_images)
    middle_image, middle_image_id, middle_images = convert_middle_date(cand_images)

    selected_images = pd.concat([first_image, middle_image, last_image], ignore_index = True)
    print(selected_images)
    
    image_index.loc[image_index.image_id == first_image_id  , 'converted'] = True
    image_index.loc[image_index.image_id == middle_image_id  , 'converted'] = True
    image_index.loc[image_index.image_id == last_image_id  , 'converted'] = True


    assert len(selected_images) == 3
    assert len(selected_images.image_id.unique()) == 3

T14SMD
                       image_id    tile        date month cloud_coverage  \
8   HLS.S30.T14SMD.2021082.v1.4  T14SMD  2021-03-23     3              2   
10  HLS.S30.T14SMD.2021087.v1.4  T14SMD  2021-03-28     3              4   
14  HLS.S30.T14SMD.2021097.v1.4  T14SMD  2021-04-07     4              0   
50  HLS.S30.T14SMD.2021192.v1.4  T14SMD  2021-07-11     7              4   
57  HLS.S30.T14SMD.2021212.v1.4  T14SMD  2021-07-31     7              1   
61  HLS.S30.T14SMD.2021222.v1.4  T14SMD  2021-08-10     8              4   
67  HLS.S30.T14SMD.2021237.v1.4  T14SMD  2021-08-25     8              1   
75  HLS.S30.T14SMD.2021262.v1.4  T14SMD  2021-09-19     9              1   

   spatial_coverage  converted  
8               100      False  
10              100      False  
14              100      False  
50              100      False  
57              100      False  
61              100      False  
67              100      False  
75              100      False  
HLS.S30.T14

In [23]:
image_index[image_index.converted == True].head(200)

Unnamed: 0,image_id,tile,date,month,cloud_coverage,spatial_coverage,converted
8,HLS.S30.T14SMD.2021082.v1.4,T14SMD,2021-03-23,3,2,100,True
50,HLS.S30.T14SMD.2021192.v1.4,T14SMD,2021-07-11,7,4,100,True
75,HLS.S30.T14SMD.2021262.v1.4,T14SMD,2021-09-19,9,1,100,True


In [24]:
first_image_id

'HLS.S30.T14SMD.2021082.v1.4'

reproject selected cog to cdl crs

In [25]:
def reproject_hls_to_cdl(scene_folder,
                         bands = ["B02", "B03", "B04", "B05", "QA"],
                         cdl_file = "/data/2022_30m_cdls_clipped.tif"):
    
    """
    This function receives the path to a folder that contains all GeoTIFF files (for various bands)
    of a HLS scene, and reprojects those to the target CDL CRS and grid. 
    
    Assumptions:
    - scene_folder has a file structure like: ".../<scene_id>/<scene_id>.<band_id>.tiff
    - scene_folder should not have a "/" at the end
    
    Inputs:
    - scene_folder: is the path to the folder that contains HLS GeoTIFF files for all bands of HLS
    - bands: list of bands of HLS that should be reprojected (default is all bands)
    - cdl_file: contains the path to the clipped CDL GeoTIFF file
    
    """
    
    for band in bands:
        xds = xarray.open_rasterio(f"{scene_folder}/{scene_folder.split('/')[-1]}.{band}.tif")
        cdl = xarray.open_rasterio(cdl_file)
        xds_new = xds.rio.reproject_match(cdl, resampling = Resampling.bilinear)
        xds_new.rio.to_raster(raster_path = f"{scene_folder}/{scene_folder.split('/')[-1]}.{band}.5070.tif")

In [26]:
selected_images = (image_index[image_index.converted == True]).reset_index(drop = True)
print(selected_images)

                      image_id    tile        date month cloud_coverage  \
0  HLS.S30.T14SMD.2021082.v1.4  T14SMD  2021-03-23     3              2   
1  HLS.S30.T14SMD.2021192.v1.4  T14SMD  2021-07-11     7              4   
2  HLS.S30.T14SMD.2021262.v1.4  T14SMD  2021-09-19     9              1   

  spatial_coverage  converted  
0              100       True  
1              100       True  
2              100       True  


In [27]:
## reproject to cdl
for k in range(len(selected_images)):
    image_id = selected_images.at[k, 'image_id']
    print(image_id)
    reproject_hls_to_cdl("/data/tif/" + image_id)

HLS.S30.T14SMD.2021082.v1.4
HLS.S30.T14SMD.2021192.v1.4
HLS.S30.T14SMD.2021262.v1.4


chipping

In [28]:
chip_df['tile_t'] = "T" + chip_df.tile
tiles_to_chip = selected_images.tile.unique()
print(tiles_to_chip)
chips_to_process = chip_df[chip_df.tile_t.isin(tiles_to_process)].reset_index(drop = True)
chips_to_process

['T14SMD']


Unnamed: 0,chip_id,chip_x,chip_y,tile,tile_t
0,000_027,-99.078652,35.156487,14SMD,T14SMD
1,000_028,-99.076245,35.096464,14SMD,T14SMD
2,000_029,-99.073841,35.036438,14SMD,T14SMD
3,000_030,-99.071441,34.976409,14SMD,T14SMD
4,000_031,-99.069044,34.916375,14SMD,T14SMD
5,000_032,-99.066652,34.856338,14SMD,T14SMD
6,000_033,-99.064263,34.796297,14SMD,T14SMD
7,000_034,-99.061877,34.736253,14SMD,T14SMD
8,000_035,-99.059496,34.676204,14SMD,T14SMD
9,000_036,-99.057118,34.616151,14SMD,T14SMD


In [29]:
def check_qa(qa_path, shape,  valid_qa = [  0,   4,  16,  20,  32,  36,  48,  52,  64,  68,  80,  84,  96,
                            100, 112, 116, 128, 132, 144, 148, 160, 164, 176, 180, 192, 196,
                            208, 212, 224, 228, 240, 244]):
    
    """
    This function receives a path to a qa file, and a geometry. It clips the QA file to the geometry. 
    It returns the number of valid QA pixels in the geometry, and the clipped values.
    
    Assumptions: The valid_qa values are taken from Ben Mack's post:
    https://benmack.github.io/nasa_hls/build/html/tutorials/Working_with_HLS_datasets_and_nasa_hls.html
    
    Inputs:
    - qa_path: full path to reprojected QA tif file
    - shape: 'geometry' property of single polygon feature read by fiona
    - valid_qa: list of integer values that are 'valid' for QA band.
    

    
    """
    with rasterio.open(qa_path) as src:
        out_image, out_transform = rasterio.mask.mask(src, shape, crop=True)
        print(out_image.shape)
        vals = out_image.flatten()
     #   print(vals)
        ncell = len(vals)
      #  print(ncell)
        valid_count = sum(x in valid_qa for x in vals)
      #  print(valid_count)
        return(valid_count, out_image[0])
       # print(np.count_nonzero(out_image in valid_qa))
     #   print(out_image.flatten())
      #  out_meta = src.meta
     #   out_bands.append(out_image[0])
    

In [30]:
def process_chip(chip_id, 
                 chip_tile,
                 shape,
                 bands = ["B02", "B03", "B04", "B05"]):
    
    """
    This function receives a chip id, HLS tile, chip geometry, and a list of bands to process. 
    
    Assumptions:
    
    Inputs:
    - chip_id: string of chip id, e.g. '000_001'
    - chip_tile: string of HLS tile starting with 'T', e.g. 'T15ABC'
    - shape: 'geometry' property of single polygon feature read by fiona
    
    The function writes out a multi-date TIF containing the bands for each of the three image dates for an HLS tile. 
    The function writes out a multi-date TIF containing the QA bands of each date.
    The function writes out a chipped version of CDL. 
    The function calls check_qa(), which makes assumptions about what QA pixels are valid.
    The function returns the number of valid QA pixels at each date, as a tuple.
    
    """
    ## get reprojected image paths
    selected_image_folders = sorted(glob(f'/data/tif/*{chip_tile}*'))
   # print(selected_image_folders)
    
    assert len(selected_image_folders) == 3
    
    first_date_images = sorted(glob(selected_image_folders[0] + '/*.5070.tif')) 
    first_date_qa = [x for x in first_date_images if '.QA.' in x][0]
    first_date_images.remove(first_date_qa)
    
    second_date_images = sorted(glob(selected_image_folders[1] + '/*.5070.tif'))
    second_date_qa = [x for x in second_date_images if '.QA.' in x][0]
    second_date_images.remove(second_date_qa)
    
    third_date_images = sorted(glob(selected_image_folders[2] + '/*.5070.tif'))
    third_date_qa = [x for x in third_date_images if '.QA.' in x][0]
    third_date_images.remove(third_date_qa)
    all_date_images = first_date_images + second_date_images + third_date_images
    
 #   print(all_date_images)
  #  print(len(all_date_images))
    
    valid_first, qa_first = check_qa(first_date_qa, shape)
    valid_second, qa_second = check_qa(second_date_qa, shape)
    valid_third, qa_third = check_qa(third_date_qa, shape)
    qa_bands = []
   # print('qa')
   # print(qa_first)
   # print(qa_first.shape)
    qa_bands.append(qa_first)
    qa_bands.append(qa_second)
    qa_bands.append(qa_third)
    qa_bands = np.array(qa_bands).astype(np.int16)
  #  print(qa_bands.shape)
   # print(first_date_qa)
    assert len(all_date_images) == 3 * len(bands)
    
    out_bands = []
    
    for img in all_date_images:
        with rasterio.open(img) as src:
            out_image, out_transform = rasterio.mask.mask(src, shape, crop=True)
            out_meta = src.meta
            out_bands.append(out_image[0])
    
    out_bands = np.array(out_bands)
    print(out_bands.shape)
    print(out_image.shape)
    ## band order?   
    out_meta.update({"driver": "GTiff",
                     "height": out_bands.shape[1],
                     "width": out_bands.shape[2],
                     "count": out_bands.shape[0],
                     "transform": out_transform})
   # print(out_meta)
    
    with rasterio.open(chip_output_dir + "chip_hls_id_" + str(chip_id) + ".tif", "w", **out_meta) as dest:
        dest.write(out_bands)
      
    ## update for QA
   # print(qa_bands.shape)
    out_meta.update({"driver": "GTiff",
                     "height": qa_bands.shape[1],
                     "width": qa_bands.shape[2],
                     "count": qa_bands.shape[0],
                     "transform": out_transform})
    
    with rasterio.open(chip_output_dir + "chip_hls_id_" + str(chip_id) + "_QA.tif", "w", **out_meta) as dest:
        dest.write(qa_bands)  
    
        
    ## clip cdl to chip
  
    with rasterio.open("/data/2022_30m_cdls_clipped.tif") as src:
        out_image, out_transform = rasterio.mask.mask(src, shape, crop=True)
        out_meta = src.meta
     #   colormap = src.colormap(1)

    out_meta.update({"driver": "GTiff",
                     "height": out_image.shape[1],
                     "width": out_image.shape[2],
                     "transform": out_transform})

    with rasterio.open(chip_cdl_dir + "chip_cdl_id_" + str(chip_id) + ".tif", "w", **out_meta) as dest:
        dest.write(out_image)
        
    return(valid_first,
           valid_second,
           valid_third)
    

In [None]:
## process chips


for k in range(len(chips_to_process)):
    current_id = chips_to_process.chip_id[k]
    chip_tile = chips_to_process.tile_t[k]
    print(current_id)
    print(chip_tile)
    chip_index = chip_ids.index(current_id)
   # print(chip_index)
    
    chip_feature = chips['features'][chip_index]
   # print(chip_feature)
    
    shape = [chip_feature['geometry']]
    
    ## do we want to scale/clip reflectances?
    
    valid_first,  valid_second, valid_third = process_chip(current_id, chip_tile, shape)
    
    chip_df_index = chip_df.index[chip_df['chip_id'] == current_id].tolist()[0]
    print(chip_df_index)
    chip_df.at[chip_df_index, 'valid_first'] = valid_first
    chip_df.at[chip_df_index, 'valid_second'] = valid_second
    chip_df.at[chip_df_index, 'valid_third'] = valid_third
    
    


In [47]:
ct = datetime.datetime.now()
chip_df.to_csv(chip_csv + str(ct) + '.csv')
#chip_df.write_csv(chip_csv)

'/data/chip_csv_2023-05-08 02:14:07.296916.csv'