Given a shapefile or csv of points along the SB coast, this notebook creates a dataframe with the following: 

It uses the NAIP scene's footprints. 

In [1]:
import os
import time
import pandas as pd
import numpy as np
import geopandas as gpd

import rasterio as rio
from rasterio.crs import CRS

from shapely.geometry import box

import sample_rasters as sr

In [2]:
# ---------------------------------------------------------------
# make dictionary with points that need to be sampled from each scene
def split_by_scene(pts, footprints):
    
    unchecked = list(pts.index)
    pts_to_sample = {key: [] for key in footprints.id}

    for itemid in footprints.id:
        box = list(footprints[footprints.id == itemid].geometry)[0]

        to_remove = []
        for i in unchecked:
            point = pts.iloc[i].geometry
            if box.contains(point) == True:
                pts_to_sample[itemid].append(i)
                to_remove.append(i)

        for i in to_remove:
            unchecked.remove(i)
            
    keychain = pts_to_sample.copy().keys() 
    for key in keychain:
        if len(pts_to_sample[key]) == 0:
            pts_to_sample.pop(key)
            
    return pts_to_sample

In [3]:
# if points only come form aois, otherwise set to False
only_aois = True

year = 2020

# -------------------------------------------
# save resulting dataframe
save = True

# -------------------------------------------
# filepath to points
fp_pts = '/home/jovyan/msai4earth-esa/iceplant_detection/models/modelAE5_FP_2020/false_positives/iceplant_false_positives_AE5_FP/'
file_name = 'modelAE5_FP_false_positives.shp'

# -------------------------------------------
# indicate if file is a csv or shapefile
csv = False
shp = True
# -------------------------------------------
if csv:
    pd.read_csv(fp_pts+file_name)

In [4]:
# -------------------------------------------
if csv:
    # if file is a csv indicate these:
    crs = CRS.from_epsg(4326)

    # column names for longitude and latitude
    lon_label = 'lon' 
    lat_label = 'lat'

In [5]:
# ---------------------------------------------------
# aoi information 
scene_ids = ['ca_m_3412037_nw_10_060_20200607',
             'ca_m_3412039_nw_10_060_20200522',
             'ca_m_3412040_ne_10_060_20200522',
             'ca_m_3411934_sw_11_060_20200521',
             'ca_m_3411936_se_11_060_20200521']

aois = { scene_ids[0] : 'point_conception',
          scene_ids[1] : 'gaviota',
          scene_ids[2] : 'capitan',
          scene_ids[3] : 'campus_lagoon',
          scene_ids[4] : 'carpinteria',         
        }

# ---------------------------------------------------
# load NAIP scenes' footprints
fp = '/home/jovyan/msai4earth-esa/iceplant_detection/separating_naip_flights/naip_scenes_footprints/naip_scenes_footprints.shp'
footprints = gpd.read_file(fp)
footprints = footprints[footprints.year == str(year)]
if only_aois:
    footprints = footprints[footprints.id.isin(scene_ids)]
footprints = footprints.reset_index(drop = True)

# ---------------------------------------------------
# open  points as geodataframe
fp = fp_pts+file_name
if csv:
    pts = sr.geodataframe_from_csv(fp = fp, 
                                   lon_label = lon_label, 
                                   lat_label = lat_label, 
                                   crs = crs)
elif shp:
    pts = gpd.read_file(fp)

if pts.crs != footprints.crs:
    pts = pts.to_crs(footprints.crs)
# CHECK THAT ALL GEOMETRIES ARE VALID
pts = pts.reset_index(drop = True)

# ---------------------------------------------------
pts_to_sample = split_by_scene(pts, footprints)
pts['naip_id'] = 'na'
if only_aois:
    pts['aoi'] = 'na'

# ---------------------------------------------------
for key in pts_to_sample:
    indices = pts_to_sample[key]
    pts.loc[indices,'naip_od'] = key
    if only_aois:
        pts.loc[indices, 'aoi'] = aois[key]
    
# ---------------------------------------------------    
pts['lon'] = pts.geometry.x
pts['lat'] = pts.geometry.y
pts = pts.drop(['geometry'],axis=1)

if only_aois:
    pts = pts[['lon','lat','naip_id','aoi']]
else:
    pts = pts[['lon','lat','naip_id']]    

pts

Unnamed: 0,x,y,itemid,aoi
0,-120.487218,34.492960,ca_m_3412037_nw_10_060_20200607,point_conception
1,-120.482942,34.487021,ca_m_3412037_nw_10_060_20200607,point_conception
2,-120.470587,34.480261,ca_m_3412037_nw_10_060_20200607,point_conception
3,-120.470228,34.480481,ca_m_3412037_nw_10_060_20200607,point_conception
4,-120.474463,34.474333,ca_m_3412037_nw_10_060_20200607,point_conception
...,...,...,...,...
569,-119.499635,34.390906,ca_m_3411936_se_11_060_20200521,carpinteria
570,-119.504277,34.389914,ca_m_3411936_se_11_060_20200521,carpinteria
571,-119.504155,34.389866,ca_m_3411936_se_11_060_20200521,carpinteria
572,-119.503870,34.389431,ca_m_3411936_se_11_060_20200521,carpinteria


In [6]:
if save:
    if shp:
        pts.to_csv(os.path.join(os.getcwd(), fp_pts, 'itemids_'+file_name.replace('.shp','.csv')), index=False)
    else:
        pts.to_csv(os.path.join(os.getcwd(), fp_pts, 'itemids_'+file_name), index=False)

In [7]:
pts.groupby(['aoi']).count()

Unnamed: 0_level_0,x,y,itemid
aoi,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
campus_lagoon,91,91,91
capitan,143,143,143
carpinteria,170,170,170
gaviota,115,115,115
point_conception,55,55,55
