In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os
import geopandas as gpd

import extracting_points_from_polygons as pp

In [2]:
def iceplant_proportions(labels):
    unique, counts = np.unique(labels, return_counts=True)
    print('no-iceplant:iceplant ratio    ',round(counts[0]/counts[1],1),':1')
    n = labels.shape[0]
    perc = [round(counts[0]/n*100,2), round(counts[1]/n*100,2)]
    df = pd.DataFrame({'iceplant':unique,
             'counts':counts,
             'percentage':perc}).set_index('iceplant')
    print(df)
    print()

In [3]:
aoi = 'carpinteria'

# Sample 2012

In [4]:
fp = pp.path_to_polygons(aoi,2012)
polys12 = gpd.read_file(fp)

In [5]:
polys_ice12 = polys12.loc[polys12.iceplant==1]
polys_ice12.reset_index(inplace=True, drop=True)
pts_ice12 = pp.naip_sample_proportion_no_warnings(polys_ice12, polys12.naip_id[0], 0.9)

print(pts_ice12.shape[0])
pts_ice12.polygon_id.value_counts()

3433


22    691
20    472
0     350
9     276
19    204
17    191
18    163
1     117
7     109
21     93
2      85
3      78
24     75
23     67
25     62
16     59
26     54
12     52
5      43
27     35
28     32
4      25
14     20
10     18
13     15
15     13
8      13
11     11
6      10
Name: polygon_id, dtype: int64

In [6]:
polys_nonice12 = polys12.loc[polys12.iceplant==0]
polys_nonice12.reset_index(inplace=True, drop=True)
pts_nonice12 = pp.naip_sample_sliding_no_warnings(polys_nonice12, polys12.naip_id[0], 0.9, 1500)

print(pts_nonice12.shape[0])
pts_nonice12.polygon_id.value_counts()

15587


46    1500
36    1500
35    1500
34    1500
30    1430
40    1014
45     963
41     956
29     899
44     789
42     741
43     581
37     547
31     420
32     366
38     306
39     297
33     278
Name: polygon_id, dtype: int64

In [7]:
pts12 = pd.concat([pts_ice12,pts_nonice12])
iceplant_proportions(pts12.iceplant)

no-iceplant:iceplant ratio     4.5 :1
          counts  percentage
iceplant                    
0          15587       81.95
1           3433       18.05



# Sample 2014

In [8]:
fp = pp.path_to_polygons(aoi,2014)
polys14 = gpd.read_file(fp)

In [9]:
polys_ice14 = polys14.loc[polys14.iceplant==1]
polys_ice14.reset_index(inplace=True, drop=True)
pts_ice14 = pp.naip_sample_proportion_no_warnings(polys_ice14, polys14.naip_id[0], 0.9)

print(pts_ice14.shape[0])
pts_ice14.polygon_id.value_counts()

2221


15    904
14    407
12    126
0     118
1     103
9      80
2      68
11     61
5      55
16     54
13     38
17     35
4      34
18     32
3      29
7      22
6      19
19     16
10     13
8       7
Name: polygon_id, dtype: int64

In [10]:
polys_nonice14 = polys14.loc[polys14.iceplant==0]
polys_nonice14.reset_index(inplace=True, drop=True)
pts_nonice14 = pp.naip_sample_sliding_no_warnings(polys_nonice14, polys14.naip_id[0], 0.8, 1500)

print(pts_nonice14.shape[0])
pts_nonice14.polygon_id.value_counts()

14293


37    1500
27    1500
26    1428
25    1345
21    1271
31     901
36     856
32     850
20     799
35     701
33     659
34     516
28     486
22     373
23     325
29     272
30     264
24     247
Name: polygon_id, dtype: int64

In [11]:
pts14 = pd.concat([pts_ice14,pts_nonice14])
iceplant_proportions(pts14.iceplant)

no-iceplant:iceplant ratio     6.4 :1
          counts  percentage
iceplant                    
0          14293       86.55
1           2221       13.45



## Sampling 2018
1. Iceplant 80% of points in polygons
2. Non-iceplant 80% of small ones up to 1500 pixels

In [12]:
fp = pp.path_to_polygons(aoi,2018)
polys18 = gpd.read_file(fp)

In [13]:
polys_ice18 = polys18.loc[polys18.iceplant==1]
polys_ice18.reset_index(inplace=True, drop=True)
pts_ice18 = pp.naip_sample_proportion_no_warnings(polys_ice18, polys18.naip_id[0], 0.8)

print(pts_ice18.shape[0])
pts_ice18.polygon_id.value_counts()

5675


36    2117
0      698
34     440
31     231
33     206
41     195
35     172
2      162
37     135
5      113
8       98
38      87
11      86
1       81
39      80
4       60
40      54
25      53
7       51
14      51
32      51
16      43
10      41
3       39
28      35
9       32
24      29
17      29
26      26
21      25
15      23
23      18
20      18
27      17
13      17
22      15
12      13
6        9
30       8
29       6
19       6
18       5
Name: polygon_id, dtype: int64

In [14]:
polys_nonice18 = polys18.loc[polys18.iceplant==0]
polys_nonice18.reset_index(inplace=True, drop=True)
pts_nonice18 = pp.naip_sample_sliding_no_warnings(polys_nonice18, polys18.naip_id[0], 0.8, 1500)

print(pts_nonice18.shape[0])
pts_nonice18.polygon_id.value_counts()

23406


59    1500
58    1500
55    1500
49    1500
42    1500
54    1500
57    1500
53    1500
43    1500
47    1500
48    1500
56    1436
50    1350
44    1039
45     905
51     756
52     733
46     687
Name: polygon_id, dtype: int64

In [15]:
pts18 = pd.concat([pts_ice18,pts_nonice18])
iceplant_proportions(pts18.iceplant)

no-iceplant:iceplant ratio     4.1 :1
          counts  percentage
iceplant                    
0          23406       80.49
1           5675       19.51



# Sample 2020

In [16]:
fp = pp.path_to_polygons(aoi,2020)
polys20 = gpd.read_file(fp)

In [17]:
polys_ice20 = polys20.loc[polys20.iceplant==1]
polys_ice20.reset_index(inplace=True, drop=True)
pts_ice20 = pp.naip_sample_proportion_no_warnings(polys_ice20, polys20.naip_id[0], 0.7)

print(pts_ice20.shape[0])
pts_ice20.polygon_id.value_counts()

8197


44    1954
41    1023
0      555
40     442
38     412
39     305
33     275
42     201
37     180
47     162
2      158
46     151
45     146
50     134
51     118
49     106
5      103
11      94
3       92
48      90
4       81
14      78
7       78
52      76
28      75
1       71
53      70
27      67
43      65
34      63
10      58
22      58
13      57
8       52
16      40
18      38
30      38
15      36
26      33
36      32
21      31
12      30
29      30
9       29
20      26
24      26
35      26
17      25
23      23
6       19
32      16
31      16
54      16
19      10
25       7
Name: polygon_id, dtype: int64

In [18]:
polys_nonice20 = polys20.loc[polys20.iceplant==0]
polys_nonice20.reset_index(inplace=True, drop=True)
pts_nonice20 = pp.naip_sample_sliding_no_warnings(polys_nonice20, polys20.naip_id[0], 0.9, 1500)

print(pts_nonice20.shape[0])
pts_nonice20.polygon_id.value_counts()

24135


72    1500
67    1500
63    1500
69    1500
68    1500
62    1500
55    1500
70    1500
71    1500
66    1500
56    1500
60    1500
61    1500
57    1169
58    1018
64     850
65     825
59     773
Name: polygon_id, dtype: int64

In [19]:
pts20 = pd.concat([pts_ice20,pts_nonice20])
iceplant_proportions(pts20.iceplant)

no-iceplant:iceplant ratio     2.9 :1
          counts  percentage
iceplant                    
0          24135       74.65
1           8197       25.35



# Save samples

In [20]:
pts = [pts12,pts14,pts18,pts20]
years = [2012,2014,2018,2020]
for i in range(0,4):
    fp = pp.path_points_csv(aoi,years[i])
    pts[i].to_csv(fp)