In [173]:
# This script is used to pre-process the cropped boarder patches
import os
import numpy as np
import pandas as pd
import rasterio
import sys
import geopandas as gpd
from pathlib import Path
from tqdm import tqdm
from rasterio.mask import mask, raster_geometry_mask
from shapely.geometry import box
from rasterio.enums import Resampling
from itertools import product
from rasterio import windows
import rioxarray
import shapely
import shutil
from osgeo import gdal
from rasterio.mask import mask
from shapely.geometry import shape
from rasterio import features

path_cur = os.path.abspath('.')
sys.path.append(path_cur)

from os.path import dirname as up

In [122]:
base_path = Path(os.path.join(up(up(up(path_cur))), 'VIMS', 'NAIP', 'VA_NAIP_2018_8977', '2018_VA_wgs84'))
file_path = os.path.join(up(path_cur), 'data/HL_NAIP/geocoded_HL/HL_marsh_tiles_wgs84.shp')
gdf = gpd.read_file(file_path)

# create identical ids
gdf['image_id'] = range(1, len(gdf)+1)
gdf['identical_id'] = gdf.apply(lambda x: x['creator'] + str(x['image_id']), axis=1)
gdf = gdf[gdf['URL'].notna()]
gdf['marsh_value'] = gdf['Marsh_Type'].apply(lambda x: assigning_type(x))

In [162]:
# These polygons represent a list of clustered marshes
kngdf = gdf[gdf['creator']=='KN']
kn_tile_list = list(set(kngdf['location'].tolist()))

In [163]:
# These polygons represent a list of spreaded polygons
nonekngdf = gdf[gdf['creator']!='KN']
nonekn_tile_list = list(set(nonekngdf['location'].tolist()))

In [196]:
def assigning_type(type_val):
    
    if type_val.lower() == 'low':
        return 2
    elif type_val.lower() == 'high':
        return 1
    else:
        raise('The marsh type value is invalid!')


def cropping_bands(ref_img_path, ups_img, outfile):
    
    """
    ref_img_path: input 10m resolution band
    ups_img_path: input low resolution band (rasterio.open() output)
    outfile: output low resolution band with geom alinged with ref_img
    """

    ref_img = rasterio.open(ref_img_path)
    # get the geometry of the reference high resolution band
    geom = box(*ref_img.bounds)
    
#     ups_img = rasterio.open(ups_img_path)
    cropped, crop_transf = mask(ups_img, [geom], crop=True, filled=False, all_touched=False)
    
    c, h, w = cropped.shape
    
    meta = ref_img.meta
    meta['width'], meta['height'] = w, h
    meta['transform'] = crop_transf

    with rasterio.open(outfile, 'w', **meta) as dst:
        dst.write(cropped)


def upsample(img_lres_path, img_hres_path, img_size, outf, method=Resampling.bilinear):
    
    """
    img_lres_path: low resolution cropped band path
    img_hres_path: high resolution cropped band path
    img_size: the size to resample
    outf: output resampled Bands
    """
    
    dataset = rasterio.open(img_lres_path)

    # resample data to target shape
    data = dataset.read(
        out_shape=(
            dataset.count,
            int(img_size),
            int(img_size)
        ),
        resampling=method
    )

    dataset_hres = rasterio.open(img_hres_path)
    
    meta = dataset_hres.meta
    
    with rasterio.open(outf, 'w', **meta) as dst:
        dst.write(data)


def get_tile_geom(tile_tif, crs=None):
    
    rds = rioxarray.open_rasterio(tile_tif)
    
    if crs is not None:

        assert isinstance(crs, str)
        
        rds_proj = rds.rio.reproject(crs)
        minx, miny, maxx, maxy = rds_proj.rio.bounds()
        geometry = shapely.geometry.box(minx, miny, maxx, maxy, ccw=True)
    
    else:
        
        minx, miny, maxx, maxy = rds.rio.bounds()
        geometry = shapely.geometry.box(minx, miny, maxx, maxy, ccw=True)
    
    return geometry

def get_tiles(ds, width=256, height=256):
    nols, nrows = ds.meta['width'], ds.meta['height']
    offsets = product(range(0, nols, width), range(0, nrows, height))
    big_window = windows.Window(col_off=0, row_off=0, width=nols, height=nrows)
    for col_off, row_off in offsets:
        window =windows.Window(col_off=col_off, row_off=row_off, width=width, height=height).intersection(big_window)
        transform = windows.transform(window, ds.transform)
        yield window, transform

def crop_img(poly, img_path, out):
    
    # get the bounding box of input segment    
    geom = [poly['geometry']]
    with rasterio.open(img_path) as src:
        out_image, out_transform = mask(src, geom, crop=True)
    
    out_meta = src.meta.copy()
    out_meta.update({"driver": "GTiff",
                "height": out_image.shape[1],
                "width": out_image.shape[2],
                "transform": out_transform})
    
    with rasterio.open(out, "w", **out_meta) as dest:
        dest.write(out_image)
        
    print(f'Finish processing image {out}')

    

def crop_img_bounding(poly, img_path, out):
    
    # get the bounding box of input segment    
    polybox = shape(poly).bounds
    
    # Create a GeoJson like object that will be used as input for mask
    pbox = [(polybox[0],polybox[1]),
             (polybox[2],polybox[1]),
             (polybox[2],polybox[3]),
             (polybox[0],polybox[3]),
             (polybox[0],polybox[1])]
    
    polyshp = [{"type": "Polygon", "coordinates": [pbox]}]
    
    with rasterio.open(img_path) as src:
        out_image, out_transform = mask(src, polyshp, crop=True)
        
    out_meta = src.meta.copy()
    out_meta.update({"driver": "GTiff",
                "height": out_image.shape[1],
                "width": out_image.shape[2],
                "transform": out_transform})
    
    with rasterio.open(out, "w", **out_meta) as dest:
        dest.write(out_image)
    
    print(f'Finish processing image {out}')

    



In [149]:

N = 256
num = 0

cropping = False

temp_patch_dir = os.path.join(up(path_cur), 'data/HL_NAIP/HL_images/temp_patches_256')
patch_dir = os.path.join(up(path_cur), 'data/HL_NAIP/HL_images/patches_256')

while cropping:
    
    cropping = False
    
    for tile in tqdm(kn_tile_list):
        
        tile = tile.split('.')[0] + '_wgs84.tif'

        wgs84_tiff = Path(base_path) / tile

        print("------------------------------------------------------")

        print("Processing tile {}: ".format(wgs84_tiff))

        num += 1
        print("Image {}".format(str(num)))

        # select vectors that are within the tile
        tif_geom = get_tile_geom(wgs84_tiff)
        sub_gdf = kngdf[kngdf.within(tif_geom)]

        # Check the vectors that overlayed with the selected tile
        if not sub_gdf.empty:

            tile_name = tile.split('.')[0]

            Path(os.path.join(temp_patch_dir, tile_name)).mkdir(exist_ok=True, parents=True)

            output_filename = tile.split('.')[0] + '_tile_{}-{}.tif'

            with rasterio.open(wgs84_tiff) as inds:

                meta = inds.meta.copy()

                for window, transform in get_tiles(inds, N, N):

                    meta['transform'] = transform
                    meta['width'], meta['height'] = window.width, window.height

                    outpath = os.path.join(temp_patch_dir, tile_name, output_filename.format(int(window.col_off), int(window.row_off)))

                    with rasterio.open(outpath, 'w', **meta) as outds:
                        outds.write(inds.read(window=window))


                    patch_geom = get_tile_geom(outpath)
                    patch_gdf = kngdf[kngdf.intersects(patch_geom)]

                    if not patch_gdf.empty:

                        # move all subtiles that are inter-sect with the CUSP data to a separate folder, the imageries in this folder will be used
                        # to create training/validation data

                        patch_path = os.path.join(patch_dir, output_filename.format(int(window.col_off), int(window.row_off)))

                        shutil.copyfile(outpath, patch_path)


In [151]:
cropping_bounding = False

while cropping_bounding:
    
    cropping_bounding = False

    for index, row in nonekngdf.iterrows():
        outfile = os.path.join(patch_dir, 'patch_{}.tif'.format(row['identical_id']))

        tile_name = row['location'].split('.')[0] + '_wgs84.tif'
        infile = Path(base_path) / tile_name

        crop_img_bounding(row['geometry'], infile, outfile)

In [175]:
allfiles = [i for i in os.listdir(patch_dir) if i.endswith('tif')]

labels = False

while labels:
    
    labels = False

    for hres in tqdm(allfiles):
        
        print('Working on patch {}'.format(hres))
        
        if not hres in os.listdir(os.path.join(up(path_cur), 'data/HL_NAIP/HL_images/labels_256')):

            hres_path = os.path.join(patch_dir, hres)
            rst_path = os.path.join(up(path_cur), 'data/HL_NAIP/HL_images/labels_256', hres)

            rst = rasterio.open(hres_path)
            meta = rst.meta.copy()
            meta.update(compress='lzw')
            meta['count'] = 1

            with rasterio.open(rst_path, 'w+', **meta) as out:
                out_arr = out.read(1)

                # this is where we create a generator of geom, value pairs to use in rasterizing
                shapes = ((geom,value) for geom, value in zip(gdf.geometry, gdf.marsh_value))

                burned = features.rasterize(shapes=shapes, fill=0, out=out_arr, transform=out.transform)
                out.write_band(1, burned)

In [187]:
df = pd.DataFrame(allfiles, columns =['file_name'])
label_dir = os.path.join(up(path_cur), 'data/HL_NAIP/HL_images/labels_256')

In [188]:
df['file_path'] = df['file_name'].apply(lambda x: os.path.join(patch_dir, x))
df['label_path'] = df['file_name'].apply(lambda x: os.path.join(label_dir, x))

In [189]:
df['valid'] = df['file_name'].apply(lambda x: 0 if x.startswith('patch') else 1)

In [195]:
df

Unnamed: 0,file_name,file_path,label_path,valid
0,m_3707645_se_18_060_20181003_wgs84_tile_5632-3...,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,1
1,m_3707645_se_18_060_20181003_wgs84_tile_5632-3...,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,1
2,m_3707645_se_18_060_20181003_wgs84_tile_5888-2...,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,1
3,m_3707645_se_18_060_20181003_wgs84_tile_5888-3...,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,1
4,m_3707645_se_18_060_20181003_wgs84_tile_5888-3...,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,1
...,...,...,...,...
715,patch_SK8961.tif,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,0
716,patch_SK8962.tif,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,0
717,patch_SK9337.tif,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,0
718,patch_SK9338.tif,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,0


In [194]:
df.to_csv(os.path.join(up(path_cur), 'data/HL_NAIP/HL_transferlearning.csv'), encoding='utf-8', index=False)

Unnamed: 0,file_name,file_path,label_path,valid
0,m_3707645_se_18_060_20181003_wgs84_tile_5632-3...,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,1
1,m_3707645_se_18_060_20181003_wgs84_tile_5632-3...,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,1
2,m_3707645_se_18_060_20181003_wgs84_tile_5888-2...,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,1
3,m_3707645_se_18_060_20181003_wgs84_tile_5888-3...,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,1
4,m_3707645_se_18_060_20181003_wgs84_tile_5888-3...,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,1
...,...,...,...,...
715,patch_SK8961.tif,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,0
716,patch_SK8962.tif,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,0
717,patch_SK9337.tif,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,0
718,patch_SK9338.tif,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,0
