In [1]:
import os
import gc   # garbage collector
import time
import pandas as pd
import numpy as np

import rasterio
import rioxarray as rioxr

import geopandas as gpd
from shapely.geometry import mapping


# custom modules
import raster_to_features as rf
import data_sampling_workflow.sample_rasters as sr

from random import sample

from shapely.geometry import Point 

from rasterio.crs import CRS

In [2]:
# ---------------------------------------
# open shapefile of SB coastal buffer and process it to use it for clipping
fp = os.path.join(os.getcwd(), 
                  'separating_naip_flights', 
                  'SB_coastal_buffer', 
                  'SB_coastal_buffer.shp')
coast = gpd.read_file(fp)
coast_geo = coast.geometry.apply(mapping)

# ---------------------------------------
scene_ids = ['ca_m_3412037_nw_10_060_20200607',
             'ca_m_3412039_nw_10_060_20200522',
             'ca_m_3412040_ne_10_060_20200522',
             'ca_m_3411934_sw_11_060_20200521',
             'ca_m_3411936_se_11_060_20200521']

# ---------------------------------------
# if there is no temp folder, create one
temp_fp = os.path.join(os.getcwd(), 'temp')
if os.path.exists(temp_fp) == False:
    os.mkdir(temp_fp)

In [6]:
n_samples = 100
year = 2020

all_points = []

for itemid in scene_ids:
    # open NAIP scene and clip to coast
    # raster = rf.rioxr_from_itemid(itemid).rio.clip(coast_geo, coast.crs)

    # ---------------------------------------
    # select pixels with data (blacked out portions have 0 on all bands)
#     df = rf.raster_as_df(raster.to_numpy(), ['r','g','b','nir'])
#     df = df.loc[ (df['nir'] != 0) | (df['r'] != 0) | (df['g'] != 0) | (df['b'] != 0)]

#     # find vegetation pixels to go into model
#     # keep ndices of water and low-ndvi pixels
#     # add ndvi and ndwi features for each pixel
#     is_veg, water_index, not_veg_index = rf.add_spectral_features(df, 
#                                                                   ndwi_thresh = 0.3, 
#                                                                   ndvi_thresh = 0.05) 
#     # ---------------------------------------
#     reconstruct = rf.indices_to_image(raster.shape[1], 
#                                       raster.shape[2], 
#                                       [is_veg.index], 
#                                       [1], 
#                                       back_value=100)

    filename = 'vegetation_' + itemid + '.tif'
    fp = os.path.join(temp_fp, filename)

#     with rasterio.open(
#         fp,  # file path
#         'w',           # w = write
#         driver = 'GTiff', # format
#         height = reconstruct.shape[0], 
#         width = reconstruct.shape[1],
#         count = 1,  # number of raster bands in the dataset
#         dtype = rasterio.uint8,
#         crs = raster.rio.crs,
#         transform = raster.rio.transform(),
#     ) as dst:
#         dst.write(reconstruct.astype(rasterio.uint8), 1)

    veg_raster = rioxr.open_rasterio(fp).squeeze()

    # arrays with indices where condition is True
    c_raster = np.where(veg_raster == 1)

    # sample random (y,x) pairs from list
    indices = sample(list(zip(c_raster[0], c_raster[1])), n_samples)

    # unzip into y and x coordinates
    y, x = list(zip(*indices))

    # x and y coordinates in raster CRS corresponding to sampled points
    x_coord = veg_raster.x[np.asarray(x)]
    y_coord = veg_raster.y[np.asarray(y)]

    # make points 
    points = [Point(m,n) for m,n in zip(x_coord, y_coord)]
    
    points_df = gpd.GeoDataFrame({'geometry': points,
                                  'scene' : itemid},
                                 crs = veg_raster.rio.crs)
    all_points.append(points_df.to_crs(CRS.from_epsg(4326)))  # change to lat/lon crs    

In [4]:
points_df = pd.concat(all_points, ignore_index=True)
points_df['LON'] = points_df.geometry.x
points_df['LAT'] = points_df.geometry.y
points_df = points_df.drop(['geometry'], axis=1)

Unnamed: 0,scene,LON,LAT
0,ca_m_3412037_nw_10_060_20200607,-120.484881,34.497110
1,ca_m_3412037_nw_10_060_20200607,-120.465460,34.452987
2,ca_m_3412037_nw_10_060_20200607,-120.470438,34.450327
3,ca_m_3412037_nw_10_060_20200607,-120.489551,34.499965
4,ca_m_3412037_nw_10_060_20200607,-120.468204,34.464625
...,...,...,...
295,ca_m_3411936_se_11_060_20200521,-119.555028,34.420658
296,ca_m_3411936_se_11_060_20200521,-119.497281,34.387493
297,ca_m_3411936_se_11_060_20200521,-119.554416,34.420487
298,ca_m_3411936_se_11_060_20200521,-119.556153,34.421364


In [7]:
points_df.to_csv('veg_train_pts_'+str(year)+'.csv', index_label='PLOTID')