In [3]:
# NAIP clip
import os
import numpy as np
import pandas as pd
import rasterio
import sys
import geopandas as gpd
from pathlib import Path
from tqdm import tqdm
from rasterio.mask import mask, raster_geometry_mask
from shapely.geometry import box
from rasterio.enums import Resampling
from itertools import product
from rasterio import windows
import rioxarray
import shapely
import shutil

path_cur = os.path.abspath('.')
sys.path.append(path_cur)

from os.path import dirname as up

In [4]:
up(up(up(path_cur)))

'/rapids/notebooks/sciclone/geograd/Miranda'

In [9]:
# Setting working directory and input data directory

base_path = Path(os.path.join(up(up(up(path_cur))), 'VIMS', 'NAIP', 'VA_NAIP_2018_8977', '2018_VA_wgs84'))
# (base_path / 'image_patches_256').mkdir(exist_ok=True, parents=True)
# (base_path / 'temp_patches').mkdir(exist_ok=True, parents=True)

Label_DATA_DIR = os.path.join(up(path_cur), 'data', 'processing_data', 'marsh_all_500.geojson')
overlap_index_tile = os.path.join(up(up(up(path_cur))), 'VIMS/NAIP/VA_NAIP_2018_8977/tileindex_VA_NAIP_2018/2018_VA_tiles.shp')

In [None]:
tile_gdf = gpd.read_file(overlap_index_tile)
overlap_tiles = tile_gdf['location'].to_list()

In [10]:
label_df = gpd.read_file(Label_DATA_DIR)

In [11]:
naip_paths = [f for f in os.listdir(base_path) if f.endswith('tif')]

In [12]:
def cropping_bands(ref_img_path, ups_img, outfile):
    
    """
    ref_img_path: input 10m resolution band
    ups_img_path: input low resolution band (rasterio.open() output)
    outfile: output low resolution band with geom alinged with ref_img
    """

    ref_img = rasterio.open(ref_img_path)
    # get the geometry of the reference high resolution band
    geom = box(*ref_img.bounds)
    
#     ups_img = rasterio.open(ups_img_path)
    cropped, crop_transf = mask(ups_img, [geom], crop=True, filled=False, all_touched=False)
    
    c, h, w = cropped.shape
    
    meta = ref_img.meta
    meta['width'], meta['height'] = w, h
    meta['transform'] = crop_transf

    with rasterio.open(outfile, 'w', **meta) as dst:
        dst.write(cropped)


def upsample(img_lres_path, img_hres_path, img_size, outf, method=Resampling.bilinear):
    
    """
    img_lres_path: low resolution cropped band path
    img_hres_path: high resolution cropped band path
    img_size: the size to resample
    outf: output resampled Bands
    """
    
    dataset = rasterio.open(img_lres_path)

    # resample data to target shape
    data = dataset.read(
        out_shape=(
            dataset.count,
            int(img_size),
            int(img_size)
        ),
        resampling=method
    )

    dataset_hres = rasterio.open(img_hres_path)
    
    meta = dataset_hres.meta
    
    with rasterio.open(outf, 'w', **meta) as dst:
        dst.write(data)


def get_tile_geom(tile_tif, crs=None):
    
    rds = rioxarray.open_rasterio(tile_tif)
    
    if crs is not None:

        assert isinstance(crs, str)
        
        rds_proj = rds.rio.reproject(crs)
        minx, miny, maxx, maxy = rds_proj.rio.bounds()
        geometry = shapely.geometry.box(minx, miny, maxx, maxy, ccw=True)
    
    else:
        
        minx, miny, maxx, maxy = rds.rio.bounds()
        geometry = shapely.geometry.box(minx, miny, maxx, maxy, ccw=True)
    
    return geometry

def get_tiles(ds, width=256, height=256):
    nols, nrows = ds.meta['width'], ds.meta['height']
    offsets = product(range(0, nols, width), range(0, nrows, height))
    big_window = windows.Window(col_off=0, row_off=0, width=nols, height=nrows)
    for col_off, row_off in offsets:
        window =windows.Window(col_off=col_off, row_off=row_off, width=width, height=height).intersection(big_window)
        transform = windows.transform(window, ds.transform)
        yield window, transform

In [13]:
# for tile in tqdm(naip_paths):
    
#     tile_name = tile.split('.')[0]
#     temp_path = os.path.join(temp_patch_dir, tile_name)
    
#     count = len(os.listdir(temp_path))
    
#     print(tile_name)
#     print(count)
    

In [14]:

N = 256
num = 0

cropping = False

temp_patch_dir = os.path.join(up(path_cur), 'data', 'NAIP_data_processing', 'temp_patch')

while cropping:
    
    cropping = False
    
    for tile in tqdm(naip_paths):

        wgs84_tiff = Path(base_path) / tile

        print("------------------------------------------------------")

        print("Processing tile {}: ".format(wgs84_tiff))

        num += 1
        print("Image {}".format(str(num)))

        tile_name = tile.split('.')[0]
        
        if not tile_name in os.listdir(temp_patch_dir):

            Path(os.path.join(temp_patch_dir, tile_name)).mkdir(exist_ok=True, parents=True)

            output_filename = tile.split('.')[0] + '_tile_{}-{}.tif'



            with rasterio.open(wgs84_tiff) as inds:

                meta = inds.meta.copy()

                for window, transform in get_tiles(inds, N, N):

                    meta['transform'] = transform
                    meta['width'], meta['height'] = window.width, window.height

                    outpath = os.path.join(temp_patch_dir, tile_name, output_filename.format(int(window.col_off), int(window.row_off)))

                    with rasterio.open(outpath, 'w', **meta) as outds:
                        outds.write(inds.read(window=window))

In [30]:
alltiles = [file for file in os.listdir(temp_patch_dir) if '_'.join(file.split('_')[:-1])+'.tif' in overlap_tiles]

In [None]:
select_image_patch = False
image_patch_path = os.path.join(up(path_cur), 'data', 'NAIP_data_processing', 'image_patch')

while select_image_patch:
    
    select_image_patch = False

    for tile in alltiles:

        onlyfiles = [f for f in os.listdir(os.path.join(temp_patch_dir, tile)) if f.endswith('tif')]
        
        for file in onlyfiles:
            
            filepath = os.path.join(temp_patch_dir, tile, file)
            patch_geom = get_tile_geom(filepath)
            patch_gdf = label_df[label_df.within(patch_geom)]

            if not patch_gdf.empty:
                print(file)
                # move all subtiles that are inter-sect with the CUSP data to a separate folder, the imageries in this folder will be used
                # to create training/validation data
                patch_path = os.path.join(image_patch_path, file)
                shutil.copyfile(filepath, patch_path)



m_3607612_ne_18_060_20180830_wgs84_tile_2816-6144.tif
m_3607612_ne_18_060_20180830_wgs84_tile_6144-1280.tif
m_3607612_ne_18_060_20180830_wgs84_tile_6912-5888.tif
m_3607501_sw_18_060_20180827_wgs84_tile_256-11264.tif
m_3607501_sw_18_060_20180827_wgs84_tile_768-11008.tif
m_3607501_sw_18_060_20180827_wgs84_tile_1024-9728.tif
m_3607509_nw_18_060_20180827_wgs84_tile_0-3584.tif
m_3607509_nw_18_060_20180827_wgs84_tile_256-768.tif
m_3607509_nw_18_060_20180827_wgs84_tile_256-4352.tif
m_3607509_nw_18_060_20180827_wgs84_tile_768-2560.tif
m_3607509_nw_18_060_20180827_wgs84_tile_768-2816.tif
m_3607509_nw_18_060_20180827_wgs84_tile_1280-1536.tif
m_3607509_nw_18_060_20180827_wgs84_tile_1536-2048.tif
m_3607509_nw_18_060_20180827_wgs84_tile_2560-3072.tif
m_3607509_nw_18_060_20180827_wgs84_tile_2816-3072.tif
m_3607509_nw_18_060_20180827_wgs84_tile_3328-8192.tif
m_3607509_nw_18_060_20180827_wgs84_tile_3840-8448.tif
m_3607509_nw_18_060_20180827_wgs84_tile_5376-8448.tif
m_3607509_sw_18_060_20180827_wgs84_t