This notebook can be used to add R,G,B,NIR, NDVI and date features to a csv that has the following information at every point:

x-coordinate (longitude), y-coordinate (latitude), itemid.

Iceplant classification can be added from a column present in the dataset, set as a constant value, or not included.

In particular, this can be used to add features to files coming fro Collect Earth Online classifications. 

All points must come from a single year and have the same crs.

In [1]:
import os
import pandas as pd
import geopandas as gpd
import planetary_computer as pc
import rioxarray as rioxr

import rasterio as rio
from rasterio.crs import CRS

from shapely.geometry import box

import sample_rasters as sr

In [2]:
# file path to csv or shp with points to add spectral info

folder = '/home/jovyan/msai4earth-esa/iceplant_detection/models/model_2k/twok_dataset/'
file_name = 'extra_pts_conc_cap.csv'
fp = folder+file_name
pd.read_csv(fp).head()

Unnamed: 0,lon,lat,pl_scene,iceplant
0,-120.021107,34.470154,ca_m_3412040_ne_10_060_20200522,0.0
1,-120.033436,34.470882,ca_m_3412040_ne_10_060_20200522,0.0
2,-120.061352,34.470356,ca_m_3412040_ne_10_060_20200522,0.0
3,-120.038247,34.463719,ca_m_3412040_ne_10_060_20200522,0.0
4,-120.044848,34.470049,ca_m_3412040_ne_10_060_20200522,0.0


In [3]:
# indicate if file is a csv or shapefile
csv = True

# -------------------------------------------
# if file is a csv indicate these:
crs = CRS.from_epsg(4326)

# column names for longitude and latitude
lon_label = 'lon' # lon = x
lat_label = 'lat' # lat = y

# -------------------------------------------
# name of column containing itemid of the NAIP scene containing the point if there is one
itemid_col = 'pl_scene'

# -------------------------------------------
# one of 'ignore', 'included', 'set_to_constant'
iceplant_param = 'included'

# if iceplant_param = 'included' indicate which column should be used as iceplant classification column
# name of column indicating whether the point is iceplant (1) or other vegetation (0)
iceplant_col = 'iceplant'

# if iceplant_param = 'set_to_constant', indicate the constant value for all points
iceplant_val = 0

# -------------------------------------------
add_aois = True

# -------------------------------------------
# all pts need to come from same year: used to look for NAIP scenes
year = 2020

# -------------------------------------------
# name of output csv with added features to points
# saved in folder 
save = True
csv_name = 'rgbnir_'+file_name

In [4]:
# ---------------------------------------------------
# open validation points as geodataframe
pts = sr.geodataframe_from_csv(fp = fp, 
                               lon_label = lon_label, 
                               lat_label = lat_label, 
                               crs = crs)

scene_ids = list(pts[itemid_col].unique())    

# ---------------------------------------------------
# itemds for each aoi (if needed)
D = { 'ca_m_3412037_nw_10_060_20200607' : 'point_conception',
      'ca_m_3412039_nw_10_060_20200522' : 'gaviota',
      'ca_m_3412040_ne_10_060_20200522' : 'capitan',
      'ca_m_3411934_sw_11_060_20200521' : 'campus_lagoon' ,
      'ca_m_3411936_se_11_060_20200521' : 'carpinteria'}

# ---------------------------------------------------
sampled_points = []
N = len(scene_ids)  # counter to finish

for itemid in scene_ids:

    to_sample = pts[pts[itemid_col] == itemid].geometry
    
    if len(to_sample) > 0:
        # ---------------------------------------------------        
        # sample spectral bands from NAIP
        item = sr.get_item_from_id(itemid)
        scene_rast_r = sr.get_raster_from_item(item)     

        band_names = ['r', 'g', 'b', 'nir']
        spectral_bands = sr.sample_raster_from_pts(to_sample, scene_rast_r, band_names).set_index(to_sample.index)

        # ---------------------------------------------------                    
        # concatenate sampled data
        df = pd.concat([to_sample, spectral_bands], axis=1)
        # ---------------------------------------------------
        # add date and naipid information
        kwargs = {'year' : item.datetime.year,
                  'month' : item.datetime.month,
                  'day_in_year' : sr.day_in_year(item.datetime.day, item.datetime.month, item.datetime.year),
                  'naip_id' : itemid}
        df = df.assign(**kwargs)
        # ---------------------------------------------------
        sampled_points.append(gpd.GeoDataFrame(df))
        
    # ---------------------------------------
    # processing message
    N = N-1                
    print('REMAINING: ', N, 'scenes', end="\r")        

samples = pd.concat(sampled_points).sort_index()

# ---------------------------------------
# create iceplant column
if iceplant_param == 'set_to_constant':
    samples['iceplant'] = iceplant_val
elif iceplant_param == 'included':
    samples['iceplant'] = pts.iceplant

# ---------------------------------------
samples = samples.assign( x = lambda df : df.geometry.x, 
                          y = lambda df : df.geometry.y,
                         pts_crs = crs.to_string())

# ---------------------------------------
# create ndvi column // TO DO: make this with assign
samples['ndvi'] = (samples.nir.astype('int16') - samples.r.astype('int16'))/(samples.nir.astype('int16') + samples.r.astype('int16'))

if add_aois:
    for key in D.keys():
        samples.loc[samples.naip_id == key,'aoi'] = D[key]
# ---------------------------------------
features = ['x', 'y', 'pts_crs','naip_id','aoi',
            'r', 'g', 'b', 'nir', 'ndvi', 
            'year', 'month', 'day_in_year',
            'iceplant']

if iceplant_param == 'ignore':
    features.remove('iceplant')
if not add_aois:
    features.remove('aoi')

samples = samples[features]
# ---------------------------------------

if save:
    samples.to_csv(os.path.join(os.getcwd(), folder, csv_name), index=False) 

REMAINING:  0 scenes

In [5]:
samples.groupby(['aoi','iceplant']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,x,y,pts_crs,naip_id,r,g,b,nir,ndvi,year,month,day_in_year
aoi,iceplant,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
capitan,0.0,22,22,22,22,22,22,22,22,22,22,22,22
point_conception,0.0,51,51,51,51,51,51,51,51,51,51,51,51
point_conception,1.0,61,61,61,61,61,61,61,61,61,61,61,61
