In [89]:
# This script is used to pre-process the cropped boarder patches
import os
import numpy as np
import pandas as pd
import rasterio
import sys
import geopandas as gpd
from pathlib import Path
from tqdm import tqdm
from rasterio.mask import mask, raster_geometry_mask
from shapely.geometry import box
from rasterio.enums import Resampling
from itertools import product
from rasterio import windows
import rioxarray
import shapely
import shutil
from osgeo import gdal

path_cur = os.path.abspath('.')
sys.path.append(path_cur)

from os.path import dirname as up

In [90]:
label_file = Path(os.path.join(up(up(path_cur)), 'MarshMapping_local/data/NAIP_data_processing/labels_tmi_modified.csv'))
out_file = Path(os.path.join(up(up(path_cur)), 'MarshMapping_local/data/NAIP_data_processing/labels_tmi_modified_valid.csv'))

In [91]:
df = pd.read_csv(label_file)

In [92]:
testfile = df['patch_name'][0]
rasterio.open(testfile).read().shape

(4, 256, 256)

In [111]:
def filter_image_size(img):

    image = rasterio.open(img).read()
    w,h = image.shape[1:]
    
    if w!=256 or h!=256:
        return 0
    else:
        return 1

def filter_image_zero(img):
    
    image = rasterio.open(img).read()
    
    zero_count = len(np.where(image==0)[0])
    
    if zero_count > 0:
        return 0
    else:
        return 1

def calculate_weights(img, val):
    
    image_array = rasterio.open(img).read(1)
    val_count = len(image_array[image_array==int(val)])

    return val_count


def get_patch_name(img):
    
    return os.path.basename(img)
        
        

In [94]:
df['filter_size'] = df['patch_name'].apply(lambda x: filter_image_size(x))
df['filter_zero'] = df['patch_name'].apply(lambda x: filter_image_zero(x))

In [95]:
df['none_marsh'] = df['label'].apply(lambda x: calculate_weights(x, 0))
df['marsh'] = df['label'].apply(lambda x: calculate_weights(x, 1))

In [96]:
df

Unnamed: 0,patch_name,label,filter_size,filter_zero,none_marsh,marsh
0,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,1,1,65536,0
1,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,1,1,43651,21885
2,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,1,1,57891,7645
3,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,1,1,60878,4658
4,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,1,1,64032,1504
...,...,...,...,...,...,...
3870,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,1,1,60930,4606
3871,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,1,1,56441,9095
3872,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,1,1,53419,12117
3873,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,1,1,48535,17001


In [103]:
# df['valid'] = df.apply(lambda x: get_valid(x), axis=1)
valids = []

for index, row in df.iterrows():
    if row['filter_size']==1 and row['filter_zero']==1 and row['none_marsh']>0 and row['marsh']>0:
        valids.append(1)
    else:
        valids.append(0)


In [104]:
df['valid'] = valids

In [105]:
none_marsh_count = df[df['valid']==1]['none_marsh'].sum()
marsh_count = df[df['valid']==1]['marsh'].sum()

In [120]:
weights = [none_marsh_count / (none_marsh_count + marsh_count), marsh_count / (none_marsh_count + marsh_count)]

In [121]:
weights

[0.8345623386613673, 0.16543766133863266]

In [108]:
df = df.rename(columns={"patch_name": "patch_path", "label": "label_path"})

In [112]:
df['patch_name'] = df['patch_path'].apply(lambda x: get_patch_name(x))

In [113]:
df

Unnamed: 0,patch_path,label_path,filter_size,filter_zero,none_marsh,marsh,valid,patch_name
0,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,1,1,65536,0,0,m_3607612_ne_18_060_20180830_wgs84_tile_2816-6...
1,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,1,1,43651,21885,1,m_3607612_ne_18_060_20180830_wgs84_tile_6144-1...
2,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,1,1,57891,7645,1,m_3607612_ne_18_060_20180830_wgs84_tile_6912-5...
3,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,1,1,60878,4658,1,m_3607501_sw_18_060_20180827_wgs84_tile_256-11...
4,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,1,1,64032,1504,1,m_3607501_sw_18_060_20180827_wgs84_tile_768-11...
...,...,...,...,...,...,...,...,...
3870,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,1,1,60930,4606,1,m_3807764_nw_18_060_20180827_wgs84_tile_10240-...
3871,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,1,1,56441,9095,1,m_3807764_nw_18_060_20180827_wgs84_tile_11008-...
3872,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,1,1,53419,12117,1,m_3807764_nw_18_060_20180827_wgs84_tile_11264-...
3873,/rapids/notebooks/sciclone/geograd/Miranda/git...,/rapids/notebooks/sciclone/geograd/Miranda/git...,1,1,48535,17001,1,m_3807764_nw_18_060_20180827_wgs84_tile_11520-...


In [117]:
df = df.iloc[:,[-1,0,1,2,3,4,5,6]]

In [119]:
df.to_csv(out_file, encoding='utf-8', index=False)