This notebook samples a specified number of points from the vegetation (high ndvi) portion of a NAIP scene.

In [1]:
import os
import time
import pandas as pd
import numpy as np

import rasterio
from rasterio.crs import CRS
import rioxarray as rioxr

import geopandas as gpd
from shapely.geometry import mapping
from shapely.geometry import Point 

import random
from random import sample

# custom modules
import raster_to_features as rf

In [2]:
# # initiaize seed if sampling new points
# random.seed(50)

In [3]:
aois = ['point_conception']
n_samples = 140

save = False
file_name = 'sampled_veg_pts' # extension willl be .csv

In [4]:
# ---------------------------------------
D = {'point_conception' : 'ca_m_3412037_nw_10_060_20200607',
     'gaviota' : 'ca_m_3412039_nw_10_060_20200522',
     'capitan' : 'ca_m_3412040_ne_10_060_20200522',
     'campus_lagoon' : 'ca_m_3411934_sw_11_060_20200521',
     'carpinteria' : 'ca_m_3411936_se_11_060_20200521'}

scene_ids = [D[aoi] for aoi in aois]

# ---------------------------------------
# if there is no temp folder, create one
temp_fp = os.path.join(os.getcwd(), 'temp')
if os.path.exists(temp_fp) == False:
    os.mkdir(temp_fp)

# ---------------------------------------
# open shapefile of SB coastal buffer and process it to use it for clipping
fp = '/home/jovyan/msai4earth-esa/iceplant_detection/separating_naip_flights/SB_coastal_buffer/SB_coastal_buffer.shp'
coast = gpd.read_file(fp)
coast_geo = coast.geometry.apply(mapping)

# ---------------------------------------
all_points = []
for itemid in scene_ids:
    
    filename = 'vegetation_' + itemid + '.tif'
    fp = os.path.join(os.getcwd(),'aoi_vegetation_rasters', filename)
    
    # check if these rasters have already been made
    if os.path.isfile(fp) == False:
        
        #open NAIP scene and clip to coast
        raster = rf.rioxr_from_itemid(itemid).rio.clip(coast_geo, coast.crs)

        #---------------------------------------
        #select pixels with data (blacked out portions have 0 on all bands)
        df = rf.raster_as_df(raster.to_numpy(), ['r','g','b','nir'])
        df = df.loc[ (df['nir'] != 0) | (df['r'] != 0) | (df['g'] != 0) | (df['b'] != 0)]

        # find vegetation pixels to go into model
        # keep ndices of water and low-ndvi pixels
        # add ndvi and ndwi features for each pixel
        is_veg, water_index, not_veg_index = rf.add_spectral_features(df, 
                                                                      ndwi_thresh = 0.3, 
                                                                      ndvi_thresh = 0.05) 
        # ---------------------------------------
        reconstruct = rf.indices_to_image(raster.shape[1], 
                                          raster.shape[2], 
                                          [is_veg.index], 
                                          [1], 
                                          back_value=100)
        # save vegetation raster
        with rasterio.open(
            fp,  # file path
            'w',           # w = write
            driver = 'GTiff', # format
            height = reconstruct.shape[0], 
            width = reconstruct.shape[1],
            count = 1,  # number of raster bands in the dataset
            dtype = rasterio.uint8,
            crs = raster.rio.crs,
            transform = raster.rio.transform(),
        ) as dst:
            dst.write(reconstruct.astype(rasterio.uint8), 1)

    # ---------------------------------------
    # open vegetation raster
    veg_raster = rioxr.open_rasterio(fp).squeeze()

    # arrays with indices where condition is True
    c_raster = np.where(veg_raster == 1)

    # sample random (y,x) pairs from list
    indices = sample(list(zip(c_raster[0], c_raster[1])), n_samples)

    # unzip into y and x coordinates
    y, x = list(zip(*indices))

    # x and y coordinates in raster CRS corresponding to sampled points
    x_coord = veg_raster.x[np.asarray(x)]
    y_coord = veg_raster.y[np.asarray(y)]

    # make points 
    points = [Point(m,n) for m,n in zip(x_coord, y_coord)]
    
    points_df = gpd.GeoDataFrame({'geometry': points,
                                  'scene' : itemid},
                                 crs = veg_raster.rio.crs)
    all_points.append(points_df.to_crs(CRS.from_epsg(4326)))  # change to lat/lon crs    

In [5]:
points_df = pd.concat(all_points, ignore_index=True)
points_df['LON'] = points_df.geometry.x
points_df['LAT'] = points_df.geometry.y
points_df = points_df.drop(['geometry'], axis=1)

In [6]:
if save:
    fp = os.path.join(os.getcwd(),'temp',file_name+'.csv')
    points_df.to_csv(fp, index_label='PLOTID')

In [7]:
points_df

Unnamed: 0,scene,LON,LAT
0,ca_m_3412037_nw_10_060_20200607,-120.456706,34.449457
1,ca_m_3412037_nw_10_060_20200607,-120.470889,34.474552
2,ca_m_3412037_nw_10_060_20200607,-120.467985,34.461489
3,ca_m_3412037_nw_10_060_20200607,-120.463142,34.453004
4,ca_m_3412037_nw_10_060_20200607,-120.461755,34.452645
...,...,...,...
135,ca_m_3412037_nw_10_060_20200607,-120.473037,34.471968
136,ca_m_3412037_nw_10_060_20200607,-120.471394,34.455204
137,ca_m_3412037_nw_10_060_20200607,-120.471305,34.468319
138,ca_m_3412037_nw_10_060_20200607,-120.465381,34.458833


In [11]:
fp = os.path.join(os.getcwd(),'temp',file_name+'.csv')
points_df.to_csv(fp, index_label='PLOTID')