In [1]:
import rasterio as rio
from rasterio.mask import mask
import cv2
import fiona
import numpy as np
import numpy.ma as ma
import matplotlib.pyplot as plt
import os
import subprocess
import shutil
from scipy.interpolate import interp2d
import glob 
import pandas as pd
from shapely.geometry import mapping
import geopandas as gpd

In [43]:
import sys
args = sys.argv
basepath = args[1]
glacier_outline = args[2]

In [33]:
# basepath = '/Volumes/SURGE_DISK/PS_downloads_VG/2021-06/PSScene/standard_grid/stitched_by_sat' # path to grid standardized images
# glacier_outline = '/Users/jukesliu/Documents/TURNER/DATA/shapefiles_gis/VG/Variegated_polygon_UTM07.shp'

In [37]:
# read in glacier shapefile
glacier_gdf = gpd.read_file(glacier_outline)
glacier = glacier_gdf.geometry.values

In [19]:
# grab all the unique dates
dates = []
for file in os.listdir(basepath): # path
    date = file.split('_')[1] # grab the date from the file name
    dates.append(date)
unique_dates = list(set(dates)) # save a list of the dates
unique_dates.sort() # sort the dates
print(unique_dates)

['20210601', '20210604', '20210606', '20210607', '20210608', '20210609', '20210610', '20210612', '20210614', '20210617', '20210619', '20210621', '20210622', '20210624', '20210627', '20210628', '20210629']


In [42]:
for date in unique_dates:  
    print('TILES FOR', date)
    tiles = glob.glob(basepath+'/PS_'+date+'*.tif') # grab all file paths for that date
    print(tiles)
    
    if not os.path.exists(basepath+'largest_satchunk/'):
        print('folder made')
        os.mkdir(basepath+'largest_satchunk/')
    
    if len(tiles) > 1:
        coverage_compare = [] # find the tile with the greatest coverage over the glacier outline
        for tile in tiles:
            reader = rio.open(tile) # open the file
            # crop image to glacier outline to determine coverage
            try:
                out_image, out_transform = mask(reader, [mapping(glacier[0])], crop=True)
                crop_array = out_image[0,:,:]
                coverage = np.count_nonzero(crop_array[~np.isnan(crop_array)]) # calculate pixels of coverage
                coverage_compare.append(coverage)
            except Exception:
                coverage_compare.append(np.NaN)
                continue
            
        # determine the file with the greatest glacier coverage
        ref_img_idx = np.nanargmax(coverage_compare)
        reffile = tiles[ref_img_idx]
        print(reffile)
        
        # copy over that image to new folder
        shutil.copyfile(reffile, basepath+'largest_satchunk/'+reffile.split('/')[-1][:11]+'.tif')    
            
    elif len(tiles) == 1: # if there is only one, copy it over and remove sat ID
        shutil.copyfile(tiles[0], basepath+'largest_satchunk/'+tiles[0].split('/')[-1][:11]+'.tif')
        

TILES FOR 20210601
['/Volumes/SURGE_DISK/PS_downloads_VG/2021-06/PSScene/standard_grid/stitched_by_sat/PS_20210601_2448.tif']
TILES FOR 20210604
['/Volumes/SURGE_DISK/PS_downloads_VG/2021-06/PSScene/standard_grid/stitched_by_sat/PS_20210604_240f.tif']
TILES FOR 20210606
['/Volumes/SURGE_DISK/PS_downloads_VG/2021-06/PSScene/standard_grid/stitched_by_sat/PS_20210606_105c.tif']
TILES FOR 20210607
['/Volumes/SURGE_DISK/PS_downloads_VG/2021-06/PSScene/standard_grid/stitched_by_sat/PS_20210607_2407.tif']
TILES FOR 20210608
['/Volumes/SURGE_DISK/PS_downloads_VG/2021-06/PSScene/standard_grid/stitched_by_sat/PS_20210608_1001.tif', '/Volumes/SURGE_DISK/PS_downloads_VG/2021-06/PSScene/standard_grid/stitched_by_sat/PS_20210608_1063.tif']
/Volumes/SURGE_DISK/PS_downloads_VG/2021-06/PSScene/standard_grid/stitched_by_sat/PS_20210608_1001.tif
TILES FOR 20210609
['/Volumes/SURGE_DISK/PS_downloads_VG/2021-06/PSScene/standard_grid/stitched_by_sat/PS_20210609_1034.tif', '/Volumes/SURGE_DISK/PS_downloads_V