In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os
import geopandas as gpd

import extracting_points_from_polygons as pp

In [2]:
def iceplant_proportions(labels):
    unique, counts = np.unique(labels, return_counts=True)
    print('no-iceplant:iceplant ratio    ',round(counts[0]/counts[1],1),':1')
    n = labels.shape[0]
    perc = [round(counts[0]/n*100,2), round(counts[1]/n*100,2)]
    df = pd.DataFrame({'iceplant':unique,
             'counts':counts,
             'percentage':perc}).set_index('iceplant')
    print(df)
    print()

In [3]:
aoi = 'gaviota'

# Sample 2012

In [4]:
fp = pp.path_to_polygons(aoi,2012)
polys12 = gpd.read_file(fp)

In [5]:
polys_ice12 = polys12.loc[polys12.iceplant==1]
polys_ice12.reset_index(inplace=True, drop=True)
pts_ice12 = pp.naip_sample_proportion_no_warnings(polys_ice12, polys12.naip_id[0], 0.9)

print(pts_ice12.shape[0])
pts_ice12.polygon_id.value_counts()

2248


3    487
4    336
5    255
9    227
7    206
0    191
2    147
6    145
8    142
1    112
Name: polygon_id, dtype: int64

In [6]:
polys_nonice12 = polys12.loc[polys12.iceplant==0]
polys_nonice12.reset_index(inplace=True, drop=True)
pts_nonice12 = pp.naip_sample_sliding_no_warnings(polys_nonice12, polys12.naip_id[0], 0.9, 1500)

print(pts_nonice12.shape[0])
pts_nonice12.polygon_id.value_counts()

8722


17    1500
22    1143
21     884
20     662
16     640
11     639
19     578
10     530
12     519
13     442
18     431
15     381
14     373
Name: polygon_id, dtype: int64

In [7]:
pts12 = pd.concat([pts_ice12,pts_nonice12])
iceplant_proportions(pts12.iceplant)

no-iceplant:iceplant ratio     3.9 :1
          counts  percentage
iceplant                    
0           8722       79.51
1           2248       20.49



# Sample 2014

In [8]:
fp = pp.path_to_polygons(aoi,2014)
polys14 = gpd.read_file(fp)

In [9]:
polys_ice14 = polys14.loc[polys14.iceplant==1]
polys_ice14.reset_index(inplace=True, drop=True)
pts_ice14 = pp.naip_sample_proportion_no_warnings(polys_ice14, polys14.naip_id[0], 0.9)

print(pts_ice14.shape[0])
pts_ice14.polygon_id.value_counts()

883


4    227
3    206
0    191
2    147
1    112
Name: polygon_id, dtype: int64

In [10]:
polys_nonice14 = polys14.loc[polys14.iceplant==0]
polys_nonice14.reset_index(inplace=True, drop=True)
pts_nonice14 = pp.naip_sample_sliding_no_warnings(polys_nonice14, polys14.naip_id[0], 0.8, 1500)

print(pts_nonice14.shape[0])
pts_nonice14.polygon_id.value_counts()

8397


10    1464
15    1016
16     851
14     786
17     701
13     588
9      569
12     514
5      461
6      393
11     383
8      339
7      332
Name: polygon_id, dtype: int64

In [11]:
pts14 = pd.concat([pts_ice14,pts_nonice14])
iceplant_proportions(pts14.iceplant)

no-iceplant:iceplant ratio     9.5 :1
          counts  percentage
iceplant                    
0           8397       90.48
1            883        9.52



## Sampling 2018
1. Iceplant 80% of points in polygons
2. Non-iceplant 80% of small ones up to 1500 pixels

In [12]:
fp = pp.path_to_polygons(aoi,2018)
polys18 = gpd.read_file(fp)

In [13]:
polys_ice18 = polys18.loc[polys18.iceplant==1]
polys_ice18.reset_index(inplace=True, drop=True)
pts_ice18 = pp.naip_sample_proportion_no_warnings(polys_ice18, polys18.naip_id[0], 0.8)

print(pts_ice18.shape[0])
pts_ice18.polygon_id.value_counts()

2566


7    562
5    509
0    472
3    281
2    229
1    208
4    165
6    140
Name: polygon_id, dtype: int64

In [14]:
polys_nonice18 = polys18.loc[polys18.iceplant==0]
polys_nonice18.reset_index(inplace=True, drop=True)
pts_nonice18 = pp.naip_sample_sliding_no_warnings(polys_nonice18, polys18.naip_id[0], 0.8, 1500)

print(pts_nonice18.shape[0])
pts_nonice18.polygon_id.value_counts()

15543


8     1500
15    1500
19    1500
18    1500
14    1500
17    1429
9     1310
10    1283
11    1093
16    1064
13     942
12     922
Name: polygon_id, dtype: int64

In [15]:
pts18 = pd.concat([pts_ice18,pts_nonice18])
iceplant_proportions(pts18.iceplant)

no-iceplant:iceplant ratio     6.1 :1
          counts  percentage
iceplant                    
0          15543       85.83
1           2566       14.17



# Sample 2020

In [16]:
fp = pp.path_to_polygons(aoi,2020)
polys20 = gpd.read_file(fp)

In [17]:
polys_ice20 = polys20.loc[polys20.iceplant==1]
polys_ice20.reset_index(inplace=True, drop=True)
pts_ice20 = pp.naip_sample_proportion_no_warnings(polys_ice20, polys20.naip_id[0], 0.7)

print(pts_ice20.shape[0])
pts_ice20.polygon_id.value_counts()

2578


7     492
5     445
0     413
20    332
3     246
2     201
1     182
4     144
6     123
Name: polygon_id, dtype: int64

In [18]:
polys_nonice20 = polys20.loc[polys20.iceplant==0]
polys_nonice20.reset_index(inplace=True, drop=True)
pts_nonice20 = pp.naip_sample_sliding_no_warnings(polys_nonice20, polys20.naip_id[0], 0.9, 1500)

print(pts_nonice20.shape[0])
pts_nonice20.polygon_id.value_counts()

16441


8     1500
15    1500
19    1500
18    1500
14    1500
17    1500
9     1474
10    1443
11    1230
16    1197
13    1060
12    1037
Name: polygon_id, dtype: int64

In [19]:
pts20 = pd.concat([pts_ice20,pts_nonice20])
iceplant_proportions(pts20.iceplant)

no-iceplant:iceplant ratio     6.4 :1
          counts  percentage
iceplant                    
0          16441       86.45
1           2578       13.55



# Save samples

In [21]:
pts = [pts12,pts14,pts18,pts20]
years = [2012,2014,2018,2020]
for i in range(0,4):
    fp = pp.path_points_csv(aoi,years[i])
    pts[i].to_csv(fp)