In [1]:
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
import pandas as pd
import rasterio
import shapely
import os

from shapely.geometry import shape
from shapely.geometry import Point

import pystac_client
import planetary_computer as pc

import random
random.seed(10)

import sample_rasters as pp
import utility

In [2]:
polys_path = pp.path_to_polygons('campus_lagoon', 2020)
polys_path

'/home/jovyan/msai4earth-esa/iceplant_detection/data_sampling_workflow/polygons_from_naip_images/campus_lagoon_polygons/campus_lagoon_polygons_2020/campus_lagoon_polygons_2020.shp'

In [3]:
polys = gpd.read_file(polys_path)
polys.head()

Unnamed: 0,id,iceplant,year,month,day,naip_id,geometry
0,0,1,2020,5,21,ca_m_3411934_sw_11_060_20200521,"POLYGON ((-119.84479 34.40550, -119.84464 34.4..."
1,1,1,2020,5,21,ca_m_3411934_sw_11_060_20200521,"POLYGON ((-119.84545 34.40557, -119.84541 34.4..."
2,2,1,2020,5,21,ca_m_3411934_sw_11_060_20200521,"POLYGON ((-119.84474 34.40584, -119.84476 34.4..."
3,3,1,2020,5,21,ca_m_3411934_sw_11_060_20200521,"POLYGON ((-119.84429 34.40522, -119.84421 34.4..."
4,4,1,2020,5,21,ca_m_3411934_sw_11_060_20200521,"POLYGON ((-119.84671 34.40607, -119.84655 34.4..."


In [4]:
polys.shape

(29, 7)

In [5]:
type(polys.geometry[0])

shapely.geometry.polygon.Polygon

In [6]:
# Open NAIP scene
URL = "https://planetarycomputer.microsoft.com/api/stac/v1"
catalog = pystac_client.Client.open(URL)

itemid = 'ca_m_3411934_sw_11_060_20200521'
search = catalog.search(
    collections=["naip"],
    ids = itemid
)
item = list(search.get_items())[0]
rast_reader = rasterio.open(pc.sign(item.assets["image"].href))
rast_reader

<open DatasetReader name='https://naipeuwest.blob.core.windows.net/naip/v002/ca/2020/ca_060cm_2020/34119/m_3411934_sw_11_060_20200521.tif?st=2022-08-02T20%3A47%3A15Z&se=2022-08-10T20%3A47%3A15Z&sp=rl&sv=2021-06-08&sr=c&skoid=c85c15d6-d1ae-42d4-af60-e2ca0f81359b&sktid=72f988bf-86f1-41af-91ab-2d7cd011db47&skt=2022-08-03T20%3A47%3A14Z&ske=2022-08-10T20%3A47%3A14Z&sks=b&skv=2021-06-08&sig=7GIuuqCRbcxjkXlVXx4dF80gjef1BmsLI7WU0U9bk/0%3D' mode='r'>

In [7]:
type(rast_reader)

rasterio.io.DatasetReader

In [8]:
def count_pixels_in_polygons(polys, rast_reader):
    
    # convert to same crs as raster to properly calculate area of polygons
    if polys.crs != rast_reader.crs:
        print('matched crs')
        polys = polys.to_crs(rast_reader.crs)
    
    # area of a single pixel from raster resolution    
    pixel_size = rast_reader.res[0]*rast_reader.res[1]
    
    n_pixels = polys.geometry.apply(lambda p: int((p.area/pixel_size)))
    
    return  n_pixels.to_numpy()

In [9]:
n_pixels = count_pixels_in_polygons(polys, rast_reader)

matched crs


In [10]:
type(n_pixels)

numpy.ndarray

In [11]:
n_pixels

array([ 2903,   520,   502,   222,   513,   504,   408,   451,   232,
         517,  1975,   994, 14061,  6576,  4744, 11158,  5473,  4297,
        8241,  6081, 29932, 81147,  9219,  6400, 19735, 27898, 91206,
       41306, 45081])

In [12]:
n_pixels.shape

(29,)

In [13]:
polys_match = polys.to_crs(rast_reader.crs)
count_pixels_in_polygons(polys_match, rast_reader)

array([ 2903,   520,   502,   222,   513,   504,   408,   451,   232,
         517,  1975,   994, 14061,  6576,  4744, 11158,  5473,  4297,
        8241,  6081, 29932, 81147,  9219,  6400, 19735, 27898, 91206,
       41306, 45081])

In [14]:
def sample_size_in_polygons(n_pixels, param, sample_fraction=0, max_sample=0, const_sample=0):
    if param not in ['fraction', 'sliding', 'constant']:
        print('not valid parameter: param must be `fraction`, `sliding` or `constant`')
        return
    # TO DO: add warning for other parameters
                     
    if param == 'fraction':
        n_pts = sample_fraction * n_pixels
    
    elif param == 'sliding':
        n_pts = sample_fraction * n_pixels
        n_pts[n_pts>max_sample] = max_sample
    
    elif param == 'constant':
        # TO DO: add warning not to sample more points than possible
        n_pts = np.full(n_pixels.shape[0],const_sample)
    
    n_pts = n_pts.astype('int')
    return n_pts

In [15]:
param = 'fraction'
num_random_pts = sample_size_in_polygons(n_pixels, param, sample_fraction=0.5, max_sample = 1000, const_sample = 10)
num_random_pts

array([ 1451,   260,   251,   111,   256,   252,   204,   225,   116,
         258,   987,   497,  7030,  3288,  2372,  5579,  2736,  2148,
        4120,  3040, 14966, 40573,  4609,  3200,  9867, 13949, 45603,
       20653, 22540])

In [16]:
param = 'sliding'
num_random_pts = sample_size_in_polygons(n_pixels, param, sample_fraction=0.5, max_sample = 1000, const_sample = 10)
num_random_pts

array([1000,  260,  251,  111,  256,  252,  204,  225,  116,  258,  987,
        497, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000,
       1000, 1000, 1000, 1000, 1000, 1000, 1000])

In [17]:
param = 'constant'
num_random_pts = sample_size_in_polygons(n_pixels, param, sample_fraction=0.5, max_sample = 1000, const_sample = 10)
num_random_pts

array([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
       10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10])

In [31]:
def random_pts_poly(N, polygon):
    points = []
    min_x, min_y, max_x, max_y = polygon.bounds
    i= 0
    while i < N:
        point = Point(random.uniform(min_x, max_x), random.uniform(min_y, max_y))
        if polygon.contains(point):
            points.append(point)
            i += 1
    return points  

# ----------------------------

def sample_raster_from_poly(N, poly, poly_id, class_name, poly_class, rast_reader, rast_band_names, rast_crs):
    # TO DO: add catch when polygon and raster do not intersect
    points = random_pts_poly(N,poly)  # select random points inside poly
    sample = pd.DataFrame({           # make data frame with sampled points
        'geometry': pd.Series(points), 
        class_name : pd.Series(np.full(N,poly_class)),  # add class identification for all pts
        'polygon_id': pd.Series(np.full(N,poly_id))
                 })

    sample_coords = sample.geometry.apply(lambda p: (p.x, p.y))  # separate coords (needed for reasterio.io.DatasetReader.sample() )
    data_generator = rast_reader.sample(sample_coords)   # extract band values from raster
    data = np.vstack(list(data_generator))               # make band values into dataframe
    data = pd.DataFrame(data, columns=rast_band_names) 

    sample = pd.concat([sample,data],axis=1)  # add band data to sampled points

    sample['x']= sample.geometry.apply(lambda p : p.x)   # coordinate cleaning
    sample['y']= sample.geometry.apply(lambda p : p.y)
    sample.drop('geometry',axis=1,inplace=True)
    
    sample['pts_crs'] =  rast_crs  # add CRS of points
    
    sample = sample[['x','y','crs','polygon_id', class_name] + rast_band_names] # organize columns

    return sample

In [19]:
item = utility.get_item_from_id(itemid)
rast_reader = utility.get_raster_from_item(item)
polys_match = polys.to_crs(rast_reader.crs)


In [25]:
type(sample)

pandas.core.frame.DataFrame

In [34]:
i=0
N = 10
poly = polys_match['geometry'][i]    # TO DO: put all these as parameters inside the function
poly_id = polys['id'][i]
poly_class = polys['iceplant'][i]
class_name = 'iceplant'
rast_band_names = ['r','g','b','nir']
rast_crs =  rast_reader.crs.to_dict()['init']

sample = sample_raster_from_poly(N, poly, poly_id,class_name,  poly_class, rast_reader, rast_band_names, rast_crs)

  arr = construct_1d_object_array_from_listlike(values)


In [35]:
sample

Unnamed: 0,x,y,crs,polygon_id,iceplant,r,g,b,nir
0,238496.598876,3810774.0,epsg:26911,0,1,75,86,71,150
1,238494.015166,3810789.0,epsg:26911,0,1,76,83,68,160
2,238496.245333,3810791.0,epsg:26911,0,1,69,88,66,173
3,238487.392512,3810780.0,epsg:26911,0,1,83,88,71,147
4,238496.914501,3810770.0,epsg:26911,0,1,85,91,77,147
5,238503.685039,3810781.0,epsg:26911,0,1,75,77,67,140
6,238475.708049,3810789.0,epsg:26911,0,1,80,84,71,149
7,238509.782466,3810770.0,epsg:26911,0,1,74,76,70,141
8,238495.908246,3810774.0,epsg:26911,0,1,87,92,76,144
9,238476.359331,3810782.0,epsg:26911,0,1,85,88,76,136


In [41]:
def sample_naip(polys, class_name, itemid, param, sample_fraction=0, max_sample=0, const_sample=0):
    item = utility.get_item_from_id(itemid)
    
    rast_reader = utility.get_raster_from_item(item)        
    rast_band_names = ['r','g','b','nir']
    rast_crs = rast_reader.crs.to_dict()['init']
    
    polys_match = polys.to_crs(rast_reader.crs)
    
    n_pixels = count_pixels_in_polygons(polys_match, rast_reader)
    n_pts = sample_size_in_polygons(n_pixels, param, sample_fraction, max_sample, const_sample)
    
    samples = []
    for i in polys.index:   # for each polygon in set
        sample = sample_raster_from_poly(n_pts[i], 
                                         polys_match.geometry[i], polys.id[i], 
                                         class_name, polys[class_name][i], 
                                         rast_reader, rast_band_names, rast_crs)                                   
        samples.append(sample)   
    df = pd.concat(samples) # create dataframe from samples list
    
    df['year'] = item.datetime.year   # add date to samples  TO DO: get from polys? raster?
    df['month'] = item.datetime.month
    df['day_in_year'] = utility.day_in_year(item.datetime.day, item.datetime.month, item.datetime.year )
    df['naip_id'] = itemid           # add naip item id to samples
    
    return df

In [49]:
list(polys.index)

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28]