In [3]:
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
import pandas as pd
import rasterio
import shapely
import os

from shapely.geometry import shape
from shapely.geometry import Point

import pystac_client
import planetary_computer as pc

import random
random.seed(10)

import sample_rasters as pp
import utility
import lidar_sampling_functions as lsf

from rasterio.crs import CRS

from scipy.ndimage import convolve as conf2D
from scipy.ndimage import maximum_filter as maxf2D


In [5]:
aoi = 'campus_lagoon'
year = 2020
pts_fp = os.path.join(os.getcwd(),
                      'temp',
                      aoi+'_points_'+str(year)+'.csv')
crs = CRS.from_string(pd.read_csv(pts_fp).pts_crs[0])

In [6]:
def path_to_lidar(year):
    # root for all Santa Barbara County canopy height rasters
    root = '/home/jovyan/msai4earth-esa/iceplant_detection/data_sampling_workflow/SantaBarbaraCounty_lidar/'
    fp = os.path.join(root, 
                      'SantaBarbaraCounty_lidar_'+str(year)+'.tif')
    return fp

In [7]:
rast_reader_lidar = rasterio.open(lsf.path_to_lidar(year))

In [11]:
rast_reader_lidar.meta['dtype']

'int16'

In [5]:
# fp = filepath of csv, must have x and y columns representing coordinates of point
# crs must be the crs of the coordinates in csv
def geodataframe_from_csv(fp, lon_label, lat_label, crs):
    df = pd.read_csv(fp)
    if 'geometry' in df.columns:           # rename geometry column if it exists
        df = df = df.rename(columns={'geometry': 'geometry_0'})
    
    # recreate geometry column as shapely Points
    xy = []
    for x,y in zip(df[lon_label],df[lat_label]):
        xy.append(Point(x,y))
    df['geometry'] = xy

    pts = gpd.GeoDataFrame(df, crs=crs)
    return pts

In [6]:
pts = geodataframe_from_csv(pts_fp, 'x','y', crs)
pts.head(2)

  arr = construct_1d_object_array_from_listlike(values)


Unnamed: 0,x,y,pts_crs,polygon_id,iceplant,r,g,b,nir,year,month,day_in_year,naip_id,aoi,ndvi,geometry
0,238497.134472,3810765.0,epsg:26911,0,1,88,97,74,154,2020,5,142,ca_m_3411934_sw_11_060_20200521,campus_lagoon,0.272727,POINT (238497.134 3810764.766)
1,238506.444438,3810762.0,epsg:26911,0,1,90,90,70,156,2020,5,142,ca_m_3411934_sw_11_060_20200521,campus_lagoon,0.268293,POINT (238506.444 3810761.911)


In [7]:
# pts have to be in same crs as rast_reader
# pts are shapely.Points
def sample_raster_from_pts(pts, rast_reader, rast_band_names):
    if rast_reader.count != len(rast_band_names):
        print('# band names != # bands in raster'
        return

    # sample
    sample_coords = pts.apply(lambda p :(p.x, p.y))  
    samples_generator = rast_reader.sample(sample_coords)    
    
    # make band values into dataframe
    samples = np.vstack(list(samples_generator))   
    samples = pd.DataFrame(samples, columns=rast_band_names)
    
    return samples

In [16]:
rast_reader_NAIP.count

4

In [11]:
# match crs of pts and raster and separate coords (needed for reasterio.io.DatasetReader.sample())
pts = pts.to_crs(rast_reader_lidar.crs).geometry
type(pts)

geopandas.geoseries.GeoSeries

In [19]:
type(pts[0])

shapely.geometry.point.Point

In [20]:
sample_raster_from_pts(pts,rast_reader_lidar, ['canopy_height'])

pandas.core.frame.DataFrame

In [13]:
# Open NAIP scene
URL = "https://planetarycomputer.microsoft.com/api/stac/v1"
catalog = pystac_client.Client.open(URL)

itemid = 'ca_m_3411934_sw_11_060_20200521'
search = catalog.search(
    collections=["naip"],
    ids = itemid
)
item = list(search.get_items())[0]
rast_reader_NAIP = rasterio.open(pc.sign(item.assets["image"].href))

<open DatasetReader name='https://naipeuwest.blob.core.windows.net/naip/v002/ca/2020/ca_060cm_2020/34119/m_3411934_sw_11_060_20200521.tif?st=2022-08-03T20%3A37%3A32Z&se=2022-08-11T20%3A37%3A32Z&sp=rl&sv=2021-06-08&sr=c&skoid=c85c15d6-d1ae-42d4-af60-e2ca0f81359b&sktid=72f988bf-86f1-41af-91ab-2d7cd011db47&skt=2022-08-04T20%3A37%3A31Z&ske=2022-08-11T20%3A37%3A31Z&sks=b&skv=2021-06-08&sig=UJw%2BZASu1FdyIL8bL69FBnsWKcQUjWqQRuAV15EVfIs%3D' mode='r'>

In [14]:
# CHANGE IN sample_raster_from_poly by THIS
pts_naip = pts.to_crs(rast_reader_NAIP.crs).geometry
sample_raster_from_pts(pts_naip, rast_reader_NAIP, ['r','g','b','nir'])

Unnamed: 0,r,g,b,nir
0,88,97,74,154
1,90,90,70,156
2,77,85,68,168
3,90,91,70,160
4,82,89,70,162
...,...,...,...,...
25755,34,46,56,36
25756,74,84,79,107
25757,78,92,76,137
25758,52,62,67,71


## Checking rasters from lsf.save_min_max lsf and sr.min_max_rasters are the same

In [4]:
import sample_rasters as sr
import lidar_sampling_functions as lsf

In [7]:
#lsf_fp =  os.path.join(os.getcwd(),'temp', 'lidar_2020_avgs.tif')
sr_fp = os.path.join(os.getcwd(),'temp','SB_canopy_height_2020_mins.tif')

#lsf_reader = rasterio.open(lsf_fp)
sr_reader = rasterio.open(sr_fp)

In [10]:
type(sr_reader.transform)

affine.Affine

In [4]:
# DIFFERENT DATA TYPES
print(lsf_reader.dtypes)
print(sr_reader.dtypes)

('float32',)
('float32',)


In [5]:
# SAME CRS
print(lsf_reader.crs)
print(sr_reader.crs)

EPSG:32610
EPSG:32610


In [6]:
# DIFFERENT NO DATA VALUES
lsf_reader.read(1)

array([[ 2837.7778,   615.7778,   615.7778, ...,   615.7778,   615.7778,
         2837.7778],
       [  615.7778, -2717.2222, -2717.2222, ..., -2717.2222, -2717.2222,
          615.7778],
       [  615.7778, -2717.2222, -2717.2222, ..., -2717.2222, -2717.2222,
          615.7778],
       ...,
       [  615.7778, -2717.2222, -2717.2222, ..., -2717.2222, -2717.2222,
          615.7778],
       [  615.7778, -2717.2222, -2717.2222, ..., -2717.2222, -2717.2222,
          615.7778],
       [ 2837.7778,   615.7778,   615.7778, ...,   615.7778,   615.7778,
         2837.7778]], dtype=float32)

In [7]:
sr_reader.read(1)

array([[ 2837.7778,   615.7778,   615.7778, ...,   615.7778,   615.7778,
         2837.7778],
       [  615.7778, -2717.2222, -2717.2222, ..., -2717.2222, -2717.2222,
          615.7778],
       [  615.7778, -2717.2222, -2717.2222, ..., -2717.2222, -2717.2222,
          615.7778],
       ...,
       [  615.7778, -2717.2222, -2717.2222, ..., -2717.2222, -2717.2222,
          615.7778],
       [  615.7778, -2717.2222, -2717.2222, ..., -2717.2222, -2717.2222,
          615.7778],
       [ 2837.7778,   615.7778,   615.7778, ...,   615.7778,   615.7778,
         2837.7778]], dtype=float32)

In [8]:
diff = sr_reader.read(1) - lsf_reader.read(1)

In [9]:
# ALL VALUES ARE SAME EXCEPT FOR NO DATA VALUES (for max and min rasters)
np.unique(diff)

array([0.], dtype=float32)

# Checking if  cvs from old and new LIDAR sample are the same

In [26]:
new_lidar = pd.read_csv(os.path.join(os.getcwd(),'temp','campus_lagoon_pts_spectral_lidar_SR_2020.csv'))
old_lidar = pd.read_csv(os.path.join(os.getcwd(),'temp','campus_lagoon_pts_spectral_lidar_2020.csv'))

In [27]:
diff = new_lidar.max_lidar - old_lidar.max_lidar
np.unique(diff.to_numpy())

array([0])

In [28]:
diff = new_lidar.min_lidar - old_lidar.min_lidar
np.unique(diff.to_numpy())

array([-10240,      0])

In [29]:
np.count_nonzero(diff ==  -10240)

467

In [33]:
diff = new_lidar.avg_lidar - old_lidar.avg_lidar
np.unique(diff.to_numpy())

array([-3332.5557, -3332.2222, -3332.111 , -2221.4443, -1110.3334,
       -1110.2222, -1110.    ,     0.    ])

In [34]:
np.count_nonzero(diff <0)

463

In [35]:
diff = new_lidar.min_max_diff - old_lidar.min_max_diff
np.unique(diff.to_numpy())

array([   0, 9984])

In [36]:
np.count_nonzero(diff ==  9984)

467

Seems like there are values with negative avg, min and max ::: we will need to take these out from the training set

In [2]:
type((3,3))

tuple

In [15]:
#    negative_avg = avgs<0   # TO DO: NOT SURE IF THIS IS USED DOWNSTREAM 
#    avgs[negative_avg] = 0  # BUT PROBABLY SHOULD NOT BE HERE
    