Initial imports.

In [1]:
from osgeo import osr
import datacube
from datetime import datetime
import os
import django
import math
import json
from rasterio.features import rasterize
from django.contrib.gis.geos import Polygon, Point
from affine import Affine
import scipy.ndimage
from sklearn.ensemble import RandomForestClassifier
from datacube.storage import masking
import numpy    
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "madmex.settings")
django.setup()
from madmex.models import TrainObject

Create a workspace for our test.

In [2]:
dc = datacube.Datacube(app = 'load_test')

Two points that define a bounding box.

In [17]:
#min_lon = -102.925
#min_lat = 20.913
#max_lon = -102.92
#max_lat = 20.918

min_lon = -101.89179897308351
min_lat = 21.309446328840828
max_lon = -101.8318033218384
max_lat = 21.347023920881483

In [18]:
sr = dc.load(product='ls8_espa_mexico_uncompressed', 
             longitude=(min_lon, max_lon), 
             latitude=(min_lat, max_lat),
             time=(datetime(2017, 4, 1), datetime(2017, 5, 1)), 
             group_by='solar_day')
sr.geobox

GeoBox(207, 140, Affine(30.0, 0.0, 2511150.0,
       0.0, -30.0, 1043330.0), PROJCS["unnamed",GEOGCS["WGS 84",DATUM["unknown",SPHEROID["WGS84",6378137,6556752.3141]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]],PROJECTION["Lambert_Conformal_Conic_2SP"],PARAMETER["standard_parallel_1",17.5],PARAMETER["standard_parallel_2",29.5],PARAMETER["latitude_of_origin",12],PARAMETER["central_meridian",-102],PARAMETER["false_easting",2500000],PARAMETER["false_northing",0]])

We use our bounding box to create a polygon that will be used to perform a query and retrieve objects from the database that fall in it (the bounding box is small so its easier to inspect what is happening). 

In [19]:
query_polygon = Polygon(((min_lon, max_lat), 
                         (max_lon, max_lat), 
                         (max_lon, min_lat), 
                         (min_lon, min_lat), 
                         (min_lon, max_lat)))
print(query_polygon)

POLYGON ((-101.8917989730835 21.34702392088148, -101.8318033218384 21.34702392088148, -101.8318033218384 21.30944632884083, -101.8917989730835 21.30944632884083, -101.8917989730835 21.34702392088148))


We define two points that will be used to create the affine transform. To create the points a srid must be specified.

In [20]:
ul_point = Point(min_lon, max_lat, srid=4326)
br_point = Point(max_lon, min_lat, srid=4326)

print(query_polygon.touches(ul_point))
print(query_polygon.touches(br_point))

print(ul_point.transform(sr.crs.wkt,clone=True))
print(br_point.transform(sr.crs.wkt,clone=True))




True
True
POINT (2511165.221038735 1043303.12799684)
POINT (2517361.028501985 1039145.799120035)


Load the objects from the database filtering with the bounding box that we defined earlier. Then the objects are ennumerated and tuples (geojson, index) are added to an empty array called shapes. The indexes are shifted by one because the default fill of the rasterize function is 0 so we don't want to have an object indexed with the 0 tag. In another array we save the tags relative to each object, in this case we are using the "level_1" tag.

In [100]:
tag_key = 'level_1'
shapes = []
tags = {}

shift = 1
for p, obj in enumerate(TrainObject.objects.filter(the_geom__contained=query_polygon)):
    
    
    if len(obj.training_tags.all()) > 0:
        print(p + 1)
        shapes.append((json.loads(obj.the_geom.transform(sr.crs.wkt,clone=True).geojson), p + 1, ))
        for tag in obj.training_tags.all():
            if tag.key == tag_key:
                tags[p + 1] = tag.value
    else:
        print("No training tag, object ommited.")
        shift = shift - 1
        

print(len(shapes)) 
print(tags)

(0.008) SELECT "madmex_trainobject"."id", "madmex_trainobject"."the_geom"::bytea, "madmex_trainobject"."added", "madmex_trainobject"."dataset" FROM "madmex_trainobject" WHERE "madmex_trainobject"."the_geom" @ ST_GeomFromEWKB('\x0103000020e610000001000000050000000100003c137959c09c1d478fd6583540010000443c7559c09c1d478fd6583540010000443c7559c0163de6df374f35400100003c137959c0163de6df374f35400100003c137959c09c1d478fd6583540'::bytea); args=(<django.contrib.gis.db.backends.postgis.adapter.PostGISAdapter object at 0x7fe74dacde48>,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 6; args=(6,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" 

(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 187; args=(187,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 187; args=(187,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 188; args=(188,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag

(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 215; args=(215,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 216; args=(216,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 216; args=(216,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag

No training tag, object ommited.
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39


(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 253; args=(253,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 253; args=(253,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 220; args=(220,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag

(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 224; args=(224,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 168; args=(168,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 168; args=(168,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag

(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 208; args=(208,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 208; args=(208,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 209; args=(209,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag

40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77


(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 161; args=(161,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 162; args=(162,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 162; args=(162,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag

(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 152; args=(152,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 152; args=(152,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 153; args=(153,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag

(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 226; args=(226,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 267; args=(267,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 267; args=(267,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag

78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114


(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 241; args=(241,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 241; args=(241,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 249; args=(249,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag

(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 269; args=(269,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 277; args=(277,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 277; args=(277,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag

(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 289; args=(289,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 289; args=(289,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 290; args=(290,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag

115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151


(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 287; args=(287,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 322; args=(322,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 322; args=(322,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag

(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 312; args=(312,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 312; args=(312,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 313; args=(313,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag

(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 324; args=(324,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 325; args=(325,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 325; args=(325,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag

152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188


(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 338; args=(338,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 339; args=(339,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 339; args=(339,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag

(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 351; args=(351,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 351; args=(351,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 352; args=(352,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag

(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 359; args=(359,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 360; args=(360,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 360; args=(360,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag

189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226


(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 371; args=(371,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 378; args=(378,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 378; args=(378,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag

(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 397; args=(397,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 397; args=(397,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 386; args=(386,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag

(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 400; args=(400,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 401; args=(401,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag"."key", "madmex_traintag"."value" FROM "madmex_traintag" INNER JOIN "madmex_trainobject_training_tags" ON ("madmex_traintag"."id" = "madmex_trainobject_training_tags"."traintag_id") WHERE "madmex_trainobject_training_tags"."trainobject_id" = 401; args=(401,)
(0.001) SELECT "madmex_traintag"."id", "madmex_traintag

227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
258
{2: 'praderas', 3: 'tierras forestales', 4: 'praderas', 5: 'praderas', 6: 'praderas', 7: 'praderas', 8: 'tierras forestales', 9: 'tierras forestales', 10: 'tierras forestales', 11: 'tierras forestales', 12: 'tierras forestales', 13: 'praderas', 14: 'tierras forestales', 15: 'tierras forestales', 16: 'tierras forestales', 17: 'tierras forestales', 18: 'praderas', 19: 'praderas', 20: 'praderas', 21: 'tierras forestales', 22: 'praderas', 23: 'tierras forestales', 24: 'praderas', 25: 'tierras forestales', 26: 'praderas', 27: 'praderas', 28: 'tierras forestales', 29: 'tierras forestales', 30: 'tierras forestales', 31: 'tierras forestales', 32: 'praderas', 33: 'praderas', 34: 'praderas', 35: 'tierras forestales', 36: 'tierras forestales', 37: 'praderas', 38: 'praderas', 39: 'praderas', 40: 'praderas', 41: 'tierras forestales', 42: 'tierras forestales', 43: '

An affine transform is defined to feed the rasterize function. It is needed to provide with dimension and resolution to the output mask. The coordinates that define our transformation must be in the same projection as our data, in this case I take the crs from our datacube data, and get it's well known text representation to transform our points.

In [101]:
ul_point = Point(min_lon, max_lat, srid=4326).transform(sr.crs.wkt,clone=True)
br_point = Point(max_lon, min_lat, srid=4326).transform(sr.crs.wkt,clone=True)

size_x = sr.dims['x']
size_y = sr.dims['y']

ulx = ul_point[0]
uly = ul_point[1]
brx = br_point[0]
bry = br_point[1]

shifted_affine = Affine((brx - ulx) / size_x, 0, ulx, 0, (bry-uly) / size_y, uly)

The objects are rasterized into a numpy array. Should be noticed that we have less objects after the rasterizing. I am not sure what is happening, I tested several things and my strongest hipotesis is that at this resolution, some objects overlap and they are overwritten depending on the moment in which the values are burnt.

In [102]:
print('number of objects from the database: %s' % len(shapes))
mask = rasterize(shapes, out_shape=(size_x,size_y), transform = shifted_affine, all_touched=False)
print('number of objects from the mask: %s' % len(numpy.unique(mask)))
print(mask)

number of objects from the database: 258
number of objects from the mask: 208
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


Remember that we shifted the object indexes by one? When we retrieve the tags from our array we need to shift back to get the values. (This is a bit hacky, there should be another way)

In [105]:
new_values = numpy.unique(mask)[1:]
print(new_values)

[  3   5   6   8   9  10  11  12  13  14  15  16  18  19  20  21  22  23
  24  25  26  27  28  29  30  31  32  33  34  35  36  37  41  42  43  45
  46  47  48  49  50  51  53  54  55  56  58  59  60  61  63  64  65  66
  67  69  71  72  73  74  75  76  77  78  79  80  83  84  85  86  90  91
  92  94  95  96  97  99 100 101 102 103 104 105 106 107 108 109 110 112
 113 114 115 116 117 118 119 120 121 123 125 126 127 128 129 130 131 132
 134 135 136 137 138 142 143 144 146 147 148 149 150 151 152 153 155 157
 158 159 160 161 162 163 167 169 170 171 172 174 175 177 178 179 180 181
 182 183 184 185 187 188 189 191 192 193 194 195 196 197 198 199 200 201
 202 203 204 205 206 207 208 209 210 211 212 214 215 216 217 218 219 221
 224 226 227 228 229 230 233 234 235 236 240 241 242 243 244 245 246 247
 248 251 252 253 254 255 256 257 259]


In [107]:
new_tags = [tags[index] for index in new_values]


print(len(new_values))
print(len(new_tags))

207
207


We build our X and y objects by masking the surface reflectance object using the pixel_qa and taking the mean of the pixels using the time dimension. We then apply the zonal statistics for the mean using the mask that we got from the objects.

In [195]:
clear = masking.make_mask(sr.pixel_qa, clear=True)
sr_clear = sr.where(clear)
sr_clear_mean = sr_clear.mean('time')


index = numpy.unique(mask)
labels = numpy.transpose(mask)

layers = ['green', 'red', 'blue', 'nir', 'swir1', 'swir2']
statistics = ['mean', 'maximum', 'minimum', 'variance', 'standard_deviation', 'sum']


X = calculate_zonal_statistics_by_layer(sr_clear_mean, labels, index, statistics, layers)

'''
X = numpy.transpose(numpy.array([scipy.ndimage.measurements.mean(sr_clear_mean.red.values, labels, index), 
                                 scipy.ndimage.measurements.mean(sr_clear_mean.blue.values, labels, index),
                                 scipy.ndimage.measurements.mean(sr_clear_mean.green.values, labels, index),
                                 scipy.ndimage.measurements.mean(sr_clear_mean.nir.values, labels, index),
                                 scipy.ndimage.measurements.mean(sr_clear_mean.swir1.values, labels, index),
                                 scipy.ndimage.measurements.mean(sr_clear_mean.swir2.values, labels, index)]))
'''

imp = Imputer(missing_values='NaN', strategy='mean', axis=0)


imp.fit(X)

mapping = numpy.unique(new_tags, return_inverse=True)


X = X[1:,:]

y = mapping[1]

print(X.shape)
print(y.shape)

(207, 36)
(207,)


  if np.issubdtype(dtype, float):
  elif np.issubdtype(dtype, int):
  means = sums / counts


A random forest classifier is trained with that data.

In [196]:
clf = RandomForestClassifier(random_state=0,oob_score=True)
clf.fit(numpy.nan_to_num(X),numpy.nan_to_num(y))

  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])


RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=True, random_state=0, verbose=0, warm_start=False)

A testing array is produced from the very same raster that we retrieved from the datacube. We will predict on this raster at a pixel level. It is just a test as it is kind of sneaky to use the same set to predict.

In [197]:
x_size = len(sr_clear_mean.coords['x'])
y_size = len(sr_clear_mean.coords['y'])

print(x_size, y_size)
pseudo_mask = numpy.arange(x_size * y_size).reshape(y_size, x_size)

pseudo_mask



print(X_test.shape)

207 140
(28980, 36)


In [219]:
from sklearn.preprocessing import Imputer

X_test = calculate_zonal_statistics_by_layer(sr_clear_mean, pseudo_mask, numpy.unique(pseudo_mask), statistics, layers)
print(X_test.shape)

na_mask= numpy.ma.masked_invalid(X_test)


X_test_imp = numpy.ma.masked_array(X_test, numpy.isnan(X_test))

print(numpy.unique(X_test_imp, return_counts=True))

print(X_test_imp.shape)

(28980, 36)
(masked_array(data=[0.0, 108.0, 123.0, ..., 11021.0, 11155.0, --],
             mask=[False, False, False, ..., False, False,  True],
       fill_value=1e+20), array([341796,      4,      4, ...,      4,      4,  17892]))
(28980, 36)


This is the array that we obtain after the prediction.

In [217]:
prediction = clf.predict(X_test_imp)

print(prediction.reshape(y_size, x_size))
print(numpy.unique(prediction, return_counts=True))
print(numpy.unique(y, return_counts=True))

ValueError: Input contains NaN, infinity or a value too large for dtype('float32').

We use the training data to create a raster with the tags to compare with our result. The no data value is -9.

In [148]:
final = numpy.array([[numpy.where(mapping[0] == tags[e-1])[0][0] if e > 0 else -9 for e in row ] for row in mask])
print(final)
        


[[-9 -9 -9 ... -9 -9 -9]
 [-9 -9 -9 ... -9 -9 -9]
 [-9 -9 -9 ... -9 -9 -9]
 ...
 [-9 -9 -9 ... -9 -9 -9]
 [-9 -9 -9 ... -9 -9 -9]
 [-9 -9 -9 ... -9 -9 -9]]


In [17]:
clf.score(numpy.nan_to_num(X),numpy.nan_to_num(y))

1.0

In [18]:
from madmex.data.munge import stats

In [117]:
from importlib import import_module

def calculate_zonal_statistics(array, labels, index, statistics):
    '''
    Receives an array with labels and indexes for those labels. It calculates the zonal
    statistics for those labels. Statistics are the target functions to be applied, it should
    cointain strings from the set: ('mean', 'maximum', 'median', 'minimum', 'standard_deviation',
    'variance','sum')
    
    Args:
        array (numpy.array): Array to which statitstics will be applied
        labels (numpy.array): Labels for the statistics of interest
        index (numpy.array): Positions in which the statistics can be found
        statistics (string array): Functions to be applied

    Return:
        zonal_statistics (numpy.array): The calculated statistics
    
    '''
    results = []
    module = import_module('scipy.ndimage.measurements')
    for statistic in statistics:
        function = getattr(module, statistic)
        stat = function(array, labels=labels, index=index)
        results.append(stat)
    zonal_statistics = numpy.asarray(results).transpose()  
    return zonal_statistics


def calculate_zonal_statistics_by_layer(array, labels, index, statistics, layers):
    stats = numpy.zeros(( len(numpy.unique(labels)) , len(layers) * len(statistics) ))
    i = 0
    for layer in layers:
        st = calculate_zonal_statistics(array[layer].values, labels, index, statistics)
        ind = i * st.shape[1]
        i = i + 1
        stats[:,ind:ind+len(statistics)] = st
    return stats

In [112]:
layers = ['green', 'red', 'blue', 'nir', 'swir1', 'swir2']
statistics = ['mean', 'maximum', 'sum']

stats = numpy.zeros(( len(numpy.unique(mask)) , len(layers) * len(statistics) ))
#stats = numpy.random.randint(9, size=(34,8))
#print(stats)
for i in range(len(layers)):
    st = stats2(sr_clear_mean[layers[i]].values, labels, index, statistics)
    ind = i * st.shape[1]
    #print ("stats for band:", layers[i])
    #print (st)
    stats[:,ind:ind+len(statistics)] = st
#print("-"*30)
print(stats)

    
    

NameError: name 'stats2' is not defined

In [120]:
data = numpy.array(stats)

In [121]:
len(data[1])

18

In [112]:
data[1]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0.])

In [113]:
print(dir(import_module('scipy.ndimage.measurements')))

