In [27]:
import os
import numpy as np
import pandas as pd
import rasterio
import sys
import geopandas as gpd
from pathlib import Path
from tqdm import tqdm
from rasterio.mask import mask, raster_geometry_mask
from shapely.geometry import box
from rasterio.enums import Resampling

path_cur = os.path.abspath('.')
sys.path.append(path_cur)

from os.path import dirname as up

In [28]:
# Setting working directory and input data directory

base_path = Path(os.path.join(up(path_cur), 'data', 'processing_data'))
(base_path / 'image_patches_256').mkdir(exist_ok=True, parents=True)

IMG_DATA_DIR = os.path.join(up(path_cur), 'data', 'sentinel_shoreline_2017')
Label_DATA_DIR = os.path.join(up(path_cur), 'data', 'processing_data', 'marsh_all_500.geojson')

In [None]:
label_df = gpd.read_file(Label_DATA_DIR)

KeyboardInterrupt: 

In [None]:
label_df.columns

In [None]:
all_bands = [os.path.join(IMG_DATA_DIR, b) for b in os.listdir(IMG_DATA_DIR) if b.endswith('tif')]

In [None]:
def cropping_bands(ref_img_path, ups_img, outfile):
    
    """
    ref_img_path: input 10m resolution band
    ups_img_path: input low resolution band (rasterio.open() output)
    outfile: output low resolution band with geom alinged with ref_img
    """

    ref_img = rasterio.open(ref_img_path)
    # get the geometry of the reference high resolution band
    geom = box(*ref_img.bounds)
    
#     ups_img = rasterio.open(ups_img_path)
    cropped, crop_transf = mask(ups_img, [geom], crop=True, filled=False, all_touched=False)
    
    c, h, w = cropped.shape
    
    meta = ref_img.meta
    meta['width'], meta['height'] = w, h
    meta['transform'] = crop_transf

    with rasterio.open(outfile, 'w', **meta) as dst:
        dst.write(cropped)


def upsample(img_lres_path, img_hres_path, img_size, outf, sample_method='bilinear'):
    
    """
    img_lres_path: low resolution cropped band path
    img_hres_path: high resolution cropped band path
    img_size: the size to resample
    outf: output resampled Bands
    """
    
    dataset = rasterio.open(img_lres_path)

    # resample data to target shape
    data = dataset.read(
        out_shape=(
            dataset.count,
            int(img_size),
            int(img_size)
        ),
        resampling=Resampling.sample_method
    )

    dataset_hres = rasterio.open(img_hres_path)
    
    meta = dataset_hres.meta
    
    with rasterio.open(outf, 'w', **meta) as dst:
        dst.write(data)

In [1]:
# Using center point of tmi to clip image

image_clipping = False
N = 256
bands10m = ['B02', 'B03', 'B04', 'B08'] # RGB,NIR

while image_clipping:
    
    image_clipping = False
    
    for band in tqdm(all_bands):
        
        if os.path.basename(band).split('_')[1] in bands10m:

            img = rasterio.open(band)

            output_filename = os.path.basename(band).split('.')[0] + '_tile_{}.tif'

            for index, row in label_df.iterrows():

                polyid = row['unique_id']

                poly = row['geometry']
                lon, lat = poly.centroid.x, poly.centroid.y
                py, px = img.index(lon, lat)

                # Build an NxN window
                window = rasterio.windows.Window(px - N//2, py - N//2, N, N) # Window(col_off, row_off, width, height)

                # Read the data in the window
                # clip is a nbands * N * N numpy array
                clip = img.read(window=window)

                meta = img.meta
                meta['width'], meta['height'] = N, N
                meta['transform'] = rasterio.windows.transform(window, img.transform)

                outfile = os.path.join(base_path, 'image_patches_256', output_filename.format(polyid))


                with rasterio.open(outfile, 'w', **meta) as dst:
                    dst.write(clip)


In [19]:

coarse_clipping_resample = False
upsample_size = 256

all_hres = [f for f in os.listdir(os.path.join(base_path, 'image_patches_256')) if 'B02' in f]


while coarse_clipping_resample:
    
    coarse_clipping_resample = False
    
    for band in tqdm(all_bands):
        
        band_name = os.path.basename(band).split('_')[1]
        
        if band_name not in bands10m:
            
            raw_img = rasterio.open(os.path.join(IMG_DATA_DIR, band))
            
            for hres in all_hres:
                hres_path = os.path.join(os.path.join(base_path, 'image_patches_256', hres))
                crop_path = os.path.join(os.path.join(base_path, 'image_patches_256', hres.replace('B02', band_name)))
                resample_path = os.path.join(os.path.join(base_path, 'image_patches_resample_256', hres.replace('B02', band_name)))
                
                cropping_bands(hres_path, raw_img, crop_path)
                upsample(crop_path, hres_path, upsample_size, resample_path)

In [None]:
all_img_id = [i[:-4].split('_')[-1] for i in os.listdir(os.path.join(base_path, 'image_patches_256')) if 'B03' in i]

In [None]:
bands4_list = ['B02', 'B03', 'B04', 'B08']
bands_other_list = ['B01', 'B05', 'B06', 'B07', 'B09', 'B10', 'B11', 'B12', 'B8A']

band_dict = dict()

for band in bands4_list:

    if band not in band_dict:
        band_dict[band] = list()
        
    for i in all_img_id:
        
        img_name = 'merge_{}_2017_tile_{}.tif'.format(band, i)
        
        if os.path.isfile(os.path.join(base_path, 'image_patches_256', img_name)):
            band_dict[band].append(os.path.join(base_path, 'image_patches_256', img_name))
        else:
            raise ValueError('The file {} is not existed'.format(img_name))
            
for band in bands_other_list:

    if band not in band_dict:
        band_dict[band] = list()
        
    for i in all_img_id:
        
        img_name = 'merge_{}_2017_tile_{}.tif'.format(band, i)
        
        if os.path.isfile(os.path.join(base_path, 'image_patches_resample_256', img_name)):
            band_dict[band].append(os.path.join(base_path, 'image_patches_resample_256', img_name))
        else:
            raise ValueError('The file {} is not existed'.format(img_name))

In [None]:
# create a multiband images

def img_to_array(image):
    
    img = rasterio.open(image)
    img_array = img.read()
    
    meta = img.meta

    return img_array, meta
    


In [23]:
# NLCD data
nlcd_path = os.path.join(up(path_cur), 'data', 'NLCD_shoreline', 'clipped_nlcd.tif')
nlcd_label_dir = os.path.join(up(path_cur), 'data', 'processing_data', 'nlcd_label_coarse')
resample_nlcd_label_dir = os.path.join(up(path_cur), 'data', 'processing_data', 'nlcd_label_resampled')

In [26]:
nlcd_cropping = False

while nlcd_cropping:
    
    nlcd_cropping = False
    
    nlcd_data = rasterio.open(nlcd_path)
    
    for hres in all_hres:
        
        nlcd_label_name = os.path.splitext(hres)[0].split('_')[-1] + '.tif'
        nlcd_label_out = os.path.join(nlcd_label_dir, nlcd_label_name)
        hres_path = os.path.join(os.path.join(base_path, 'image_patches_256', hres))
        
        cropping_bands(hres_path, nlcd_data, nlcd_label_out)
#         upsample(cropped, hres_path, N, nlcd_label_out, sample_method='nearest')

In [None]:
# sentinel_shoreline_2017_dir = '/rapids/notebooks/sciclone/geograd/Miranda/github/MarshMapping/raw_data/sentinel_shoreline/inventory/2017/uncompressed'

# all_safe_2017 = [os.path.join('/rapids/notebooks', sentinel_shoreline_2017_dir, f) for f in os.listdir(sentinel_shoreline_2017_dir) if f.endswith('SAFE')]


# # python s2_tiles_supres.py /rapids/notebooks/sciclone/geograd/Miranda/github/MarshMapping/raw_data/sentinel_shoreline/inventory/2017/uncompressed/S2A_MSIL1C_20170509T155911_N0205_R097_T18STH_20170509T160214.SAFE/MTD_MSIL1C.xml sciclone/geograd/Miranda/github/MarshMapping/data/sentinel_highres/S2A_MSIL1C_20170509T155911_N0205_R097_T18STH_20170509T160214.tif --run_60 --copy_original_bands
# # python s2_tiles_supres.py /rapids/notebooks/sciclone/geograd/Miranda/github/MarshMapping/raw_data/sentinel_shoreline/inventory/2017/uncompressed/S2A_MSIL1C_20170509T155911_N0205_R097_T18STH_20170509T160214.SAFE/MTD_MSIL1C.xml sciclone/geograd/Miranda/github/MarshMapping/data/processing_data/test.tif