In [1]:
import pandas as pd
import os
import numpy as np
import geopandas as gpd

from shapely.geometry import Point
from rasterio import CRS

In [2]:
keepers = ['x','y','pts_crs', 'aoi', 'naip_id', 'r', 'g', 'b', 'nir', 'ndvi', 'year', 'month', 'day_in_year', 'iceplant']

In [3]:
prev_train = pd.read_csv('/home/jovyan/msai4earth-esa/iceplant_detection/models/modelAE5_FP_2020/modelAE5_FP_2020_train.csv')[keepers]
prev_test = pd.read_csv('/home/jovyan/msai4earth-esa/iceplant_detection/models/modelAE5_FP_2020/modelAE5_FP_2020_test.csv')[keepers]

prev_train['aux'] = 'train'
prev_test['aux'] = 'test'

all_prev = pd.concat([prev_train, prev_test])
all_prev = all_prev.reset_index(drop = True)
all_prev

Unnamed: 0,x,y,pts_crs,aoi,naip_id,r,g,b,nir,ndvi,year,month,day_in_year,iceplant,aux
0,-119.868370,34.417604,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,61,87,69,184,0.502041,2020,5,142,0.0,train
1,-119.869194,34.414761,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,116,120,93,174,0.200000,2020,5,142,0.0,train
2,-119.856764,34.410684,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,115,120,93,170,0.192982,2020,5,142,0.0,train
3,-119.868120,34.417642,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,59,87,69,190,0.526104,2020,5,142,0.0,train
4,-119.863904,34.413559,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,110,118,89,179,0.238754,2020,5,142,0.0,train
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2624,-120.437732,34.455862,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,101,99,73,150,0.195219,2020,6,159,0.0,test
2625,-120.443079,34.455301,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,121,97,76,157,0.129496,2020,6,159,0.0,test
2626,-120.445337,34.456349,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,136,130,99,177,0.130990,2020,6,159,0.0,test
2627,-120.445716,34.455899,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,126,126,83,177,0.168317,2020,6,159,0.0,test


In [4]:
# check all are in the same crs
print(all_prev.pts_crs.unique())

['EPSG:4326']


In [5]:
# removing points marked as iceplant from incorrect polygon in Gaviota (id = 3)
gaviota_polys = gpd.read_file('/home/jovyan/msai4earth-esa/iceplant_detection/A_data_sampling_workflow/polygons_from_naip_images/gaviota_polygons/gaviota_polygons_2020/gaviota_polygons_2020.shp')
bad_poly = gaviota_polys.iloc[16].geometry

bad_indices = []
for i in range(len(all_prev)):
    if bad_poly.contains(Point([all_prev.x[i],all_prev.y[i]])):
        bad_indices.append(i)

all_prev.loc[bad_indices]

Unnamed: 0,x,y,pts_crs,aoi,naip_id,r,g,b,nir,ndvi,year,month,day_in_year,iceplant,aux
539,-120.216331,34.47313,EPSG:4326,gaviota,ca_m_3412039_nw_10_060_20200522,116,131,116,163,0.168459,2020,5,143,1.0,train
543,-120.216353,34.473124,EPSG:4326,gaviota,ca_m_3412039_nw_10_060_20200522,114,125,115,150,0.136364,2020,5,143,1.0,train
545,-120.216348,34.473065,EPSG:4326,gaviota,ca_m_3412039_nw_10_060_20200522,119,133,120,171,0.17931,2020,5,143,1.0,train
553,-120.216354,34.473025,EPSG:4326,gaviota,ca_m_3412039_nw_10_060_20200522,80,98,83,152,0.310345,2020,5,143,1.0,train
615,-120.216358,34.473098,EPSG:4326,gaviota,ca_m_3412039_nw_10_060_20200522,108,122,112,150,0.162791,2020,5,143,1.0,train
628,-120.216355,34.473086,EPSG:4326,gaviota,ca_m_3412039_nw_10_060_20200522,117,132,118,171,0.1875,2020,5,143,1.0,train
1999,-120.216375,34.473069,EPSG:4326,gaviota,ca_m_3412039_nw_10_060_20200522,110,126,116,154,0.166667,2020,5,143,1.0,test
2000,-120.216342,34.473123,EPSG:4326,gaviota,ca_m_3412039_nw_10_060_20200522,116,128,116,157,0.150183,2020,5,143,1.0,test
2003,-120.216329,34.473072,EPSG:4326,gaviota,ca_m_3412039_nw_10_060_20200522,120,133,121,171,0.175258,2020,5,143,1.0,test
2043,-120.216361,34.473096,EPSG:4326,gaviota,ca_m_3412039_nw_10_060_20200522,113,126,114,157,0.162963,2020,5,143,1.0,test


In [6]:
# remove points outside 50m coastal buffer
coast_shp = gpd.read_file('/home/jovyan/msai4earth-esa/iceplant_detection/separating_naip_flights/SB_coastal_buffer/SB_coastal_buffer.shp')
coast_shp = coast_shp.to_crs(CRS.from_epsg(4326))
coast = coast_shp.geometry[0]

for i in range(len(all_prev)):
    if coast.contains(Point([all_prev.x[i],all_prev.y[i]])) == False:
        bad_indices.append(i)
        
len(bad_indices)

180

In [7]:
# remove all unwanted points
good_prev = all_prev.drop(bad_indices, axis=0)
good_prev

Unnamed: 0,x,y,pts_crs,aoi,naip_id,r,g,b,nir,ndvi,year,month,day_in_year,iceplant,aux
0,-119.868370,34.417604,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,61,87,69,184,0.502041,2020,5,142,0.0,train
1,-119.869194,34.414761,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,116,120,93,174,0.200000,2020,5,142,0.0,train
2,-119.856764,34.410684,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,115,120,93,170,0.192982,2020,5,142,0.0,train
3,-119.868120,34.417642,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,59,87,69,190,0.526104,2020,5,142,0.0,train
4,-119.863904,34.413559,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,110,118,89,179,0.238754,2020,5,142,0.0,train
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2624,-120.437732,34.455862,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,101,99,73,150,0.195219,2020,6,159,0.0,test
2625,-120.443079,34.455301,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,121,97,76,157,0.129496,2020,6,159,0.0,test
2626,-120.445337,34.456349,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,136,130,99,177,0.130990,2020,6,159,0.0,test
2627,-120.445716,34.455899,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,126,126,83,177,0.168317,2020,6,159,0.0,test


In [8]:
good_old_train = good_prev[good_prev.aux == 'train'].drop(['aux'],axis=1)
good_old_test = good_prev[good_prev.aux == 'test'].drop(['aux'],axis=1)

In [9]:
# import false positives and false negatives
FP = pd.read_csv('/home/jovyan/msai4earth-esa/iceplant_detection/models/modelAE5_FP_2020/false_positives/glcm_features_false_positives_AE5_FP.csv')[keepers]
FN = pd.read_csv('/home/jovyan/msai4earth-esa/iceplant_detection/models/modelAE5_FP_2020/false_negatives/glcm_features_false_negatives_AE5_FP.csv')[keepers]

print(len(FP))
print(len(FN))

574
512


In [10]:
all_new = pd.concat([FP,FN]).reset_index(drop=True)
all_new

Unnamed: 0,x,y,pts_crs,aoi,naip_id,r,g,b,nir,ndvi,year,month,day_in_year,iceplant
0,-120.487218,34.492960,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,127,98,83,150,0.083032,2020,6,159,0
1,-120.482942,34.487021,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,141,110,87,158,0.056856,2020,6,159,0
2,-120.470587,34.480261,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,87,90,53,175,0.335878,2020,6,159,0
3,-120.470228,34.480481,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,121,116,88,169,0.165517,2020,6,159,0
4,-120.474463,34.474333,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,86,90,62,169,0.325490,2020,6,159,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1081,-119.519484,34.390461,EPSG:4326,carpinteria,ca_m_3411936_se_11_060_20200521,102,105,86,164,0.233083,2020,5,142,1
1082,-119.520288,34.391000,EPSG:4326,carpinteria,ca_m_3411936_se_11_060_20200521,92,105,73,177,0.315985,2020,5,142,1
1083,-119.520538,34.390967,EPSG:4326,carpinteria,ca_m_3411936_se_11_060_20200521,108,109,83,161,0.197026,2020,5,142,1
1084,-119.520184,34.390843,EPSG:4326,carpinteria,ca_m_3411936_se_11_060_20200521,82,97,74,177,0.366795,2020,5,142,1


In [11]:
all_new.groupby(['aoi','iceplant']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,x,y,pts_crs,naip_id,r,g,b,nir,ndvi,year,month,day_in_year
aoi,iceplant,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
campus_lagoon,0,91,91,91,91,91,91,91,91,91,91,91,91
campus_lagoon,1,180,180,180,180,180,180,180,180,180,180,180,180
capitan,0,143,143,143,143,143,143,143,143,143,143,143,143
capitan,1,139,139,139,139,139,139,139,139,139,139,139,139
carpinteria,0,170,170,170,170,170,170,170,170,170,170,170,170
carpinteria,1,134,134,134,134,134,134,134,134,134,134,134,134
gaviota,0,115,115,115,115,115,115,115,115,115,115,115,115
gaviota,1,14,14,14,14,14,14,14,14,14,14,14,14
point_conception,0,55,55,55,55,55,55,55,55,55,55,55,55
point_conception,1,45,45,45,45,45,45,45,45,45,45,45,45


In [12]:
#training: sample 70% of iceplant a vegetation pts from each aoi 
xtr_train = all_new.groupby(['aoi','iceplant'], group_keys=False).apply(lambda df: df.sample(frac=0.7))
xtr_test = all_new.loc[list(set(range(len(all_new))) - set(xtr_train.index))]

In [13]:
xtr_train.groupby(['aoi','iceplant']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,x,y,pts_crs,naip_id,r,g,b,nir,ndvi,year,month,day_in_year
aoi,iceplant,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
campus_lagoon,0,64,64,64,64,64,64,64,64,64,64,64,64
campus_lagoon,1,126,126,126,126,126,126,126,126,126,126,126,126
capitan,0,100,100,100,100,100,100,100,100,100,100,100,100
capitan,1,97,97,97,97,97,97,97,97,97,97,97,97
carpinteria,0,119,119,119,119,119,119,119,119,119,119,119,119
carpinteria,1,94,94,94,94,94,94,94,94,94,94,94,94
gaviota,0,80,80,80,80,80,80,80,80,80,80,80,80
gaviota,1,10,10,10,10,10,10,10,10,10,10,10,10
point_conception,0,38,38,38,38,38,38,38,38,38,38,38,38
point_conception,1,31,31,31,31,31,31,31,31,31,31,31,31


In [14]:
xtr_test.groupby(['aoi','iceplant']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,x,y,pts_crs,naip_id,r,g,b,nir,ndvi,year,month,day_in_year
aoi,iceplant,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
campus_lagoon,0,27,27,27,27,27,27,27,27,27,27,27,27
campus_lagoon,1,54,54,54,54,54,54,54,54,54,54,54,54
capitan,0,43,43,43,43,43,43,43,43,43,43,43,43
capitan,1,42,42,42,42,42,42,42,42,42,42,42,42
carpinteria,0,51,51,51,51,51,51,51,51,51,51,51,51
carpinteria,1,40,40,40,40,40,40,40,40,40,40,40,40
gaviota,0,35,35,35,35,35,35,35,35,35,35,35,35
gaviota,1,4,4,4,4,4,4,4,4,4,4,4,4
point_conception,0,17,17,17,17,17,17,17,17,17,17,17,17
point_conception,1,14,14,14,14,14,14,14,14,14,14,14,14


In [15]:
# assemble all points
train = pd.concat([xtr_train, good_old_train]).reset_index(drop=True)
test = pd.concat([xtr_test, good_old_test]).reset_index(drop=True)

In [16]:
train

Unnamed: 0,x,y,pts_crs,aoi,naip_id,r,g,b,nir,ndvi,year,month,day_in_year,iceplant
0,-119.851632,34.411630,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,83,76,68,185,0.380597,2020,5,142,0.0
1,-119.843182,34.413305,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,77,105,70,182,0.405405,2020,5,142,0.0
2,-119.865369,34.415011,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,107,102,84,155,0.183206,2020,5,142,0.0
3,-119.845463,34.414732,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,81,108,78,185,0.390977,2020,5,142,0.0
4,-119.851264,34.415973,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,64,94,74,182,0.479675,2020,5,142,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2468,-120.451944,34.456799,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,53,55,47,131,0.423913,2020,6,159,0.0
2469,-120.438128,34.458685,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,92,87,74,84,-0.045455,2020,6,159,0.0
2470,-120.484881,34.497110,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,97,92,78,110,0.062802,2020,6,159,0.0
2471,-120.485111,34.493451,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,109,87,68,140,0.124498,2020,6,159,0.0


In [17]:
test

Unnamed: 0,x,y,pts_crs,aoi,naip_id,r,g,b,nir,ndvi,year,month,day_in_year,iceplant
0,-120.487218,34.492960,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,127,98,83,150,0.083032,2020,6,159,0.0
1,-120.470228,34.480481,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,121,116,88,169,0.165517,2020,6,159,0.0
2,-120.474463,34.474333,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,86,90,62,169,0.325490,2020,6,159,0.0
3,-120.473063,34.472981,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,117,97,83,140,0.089494,2020,6,159,0.0
4,-120.472993,34.473028,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,98,96,66,161,0.243243,2020,6,159,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1057,-120.437732,34.455862,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,101,99,73,150,0.195219,2020,6,159,0.0
1058,-120.443079,34.455301,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,121,97,76,157,0.129496,2020,6,159,0.0
1059,-120.445337,34.456349,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,136,130,99,177,0.130990,2020,6,159,0.0
1060,-120.445716,34.455899,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,126,126,83,177,0.168317,2020,6,159,0.0


In [18]:
train.to_csv('model_feb14_train_coords.csv', index=False)

In [19]:
test.to_csv('model_feb14_test_coords.csv', index=False)

In [20]:
train.groupby(['aoi','iceplant']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,x,y,pts_crs,naip_id,r,g,b,nir,ndvi,year,month,day_in_year
aoi,iceplant,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
campus_lagoon,0.0,329,329,329,329,329,329,329,329,329,329,329,329
campus_lagoon,1.0,198,198,198,198,198,198,198,198,198,198,198,198
capitan,0.0,341,341,341,341,341,341,341,341,341,341,341,341
capitan,1.0,207,207,207,207,207,207,207,207,207,207,207,207
carpinteria,0.0,396,396,396,396,396,396,396,396,396,396,396,396
carpinteria,1.0,192,192,192,192,192,192,192,192,192,192,192,192
gaviota,0.0,347,347,347,347,347,347,347,347,347,347,347,347
gaviota,1.0,103,103,103,103,103,103,103,103,103,103,103,103
point_conception,0.0,196,196,196,196,196,196,196,196,196,196,196,196
point_conception,1.0,164,164,164,164,164,164,164,164,164,164,164,164


In [21]:
test.groupby(['aoi','iceplant']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,x,y,pts_crs,naip_id,r,g,b,nir,ndvi,year,month,day_in_year
aoi,iceplant,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
campus_lagoon,0.0,140,140,140,140,140,140,140,140,140,140,140,140
campus_lagoon,1.0,84,84,84,84,84,84,84,84,84,84,84,84
capitan,0.0,145,145,145,145,145,145,145,145,145,145,145,145
capitan,1.0,89,89,89,89,89,89,89,89,89,89,89,89
carpinteria,0.0,170,170,170,170,170,170,170,170,170,170,170,170
carpinteria,1.0,82,82,82,82,82,82,82,82,82,82,82,82
gaviota,0.0,151,151,151,151,151,151,151,151,151,151,151,151
gaviota,1.0,43,43,43,43,43,43,43,43,43,43,43,43
point_conception,0.0,88,88,88,88,88,88,88,88,88,88,88,88
point_conception,1.0,70,70,70,70,70,70,70,70,70,70,70,70


In [22]:
pd.concat([train,test]).groupby(['iceplant']).count()

Unnamed: 0_level_0,x,y,pts_crs,aoi,naip_id,r,g,b,nir,ndvi,year,month,day_in_year
iceplant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0.0,2303,2303,2303,2303,2303,2303,2303,2303,2303,2303,2303,2303,2303
1.0,1232,1232,1232,1232,1232,1232,1232,1232,1232,1232,1232,1232,1232
