In [1]:
import fiona
from madmex.models import PredictObject
from madmex.models import PredictClassification, Region, ValidClassification
import json
from shapely.geometry import mapping
from madmex.util.spatial import geometry_transform
from shapely.geometry import shape
import os
from operator import itemgetter
from django.contrib.gis.geos.geometry import GEOSGeometry
from fiona.crs import to_string
from dask.distributed import Client

In [2]:
predict_name = 'lc_rf_s1_2_20m_resampled_10m_Jalisco_recipe_2017'

In [3]:
qs_ids = PredictClassification.objects.filter(name=predict_name).distinct('predict_object_id')

In [4]:
list_ids = [x.predict_object_id for x in qs_ids]

In [5]:
len(list_ids)

56

In [6]:
region = 'Jalisco'

In [7]:
region_geom = Region.objects.get(name=region).the_geom
region_geojson = region_geom.geojson
geometry_region = json.loads(region_geojson)
proj4='+proj=lcc +lat_1=17.5 +lat_2=29.5 +lat_0=12 +lon_0=-102 +x_0=2500000 +y_0=0 +a=6378137 +b=6378136.027241431 +units=m +no_defs'
geometry_region_proj = geometry_transform(geometry_region,proj4)

In [8]:
validation_set='validation_Jalisco'

In [9]:
proj4_out = '+proj=longlat'

In [10]:
def valid_object_to_feature(x, crs):
    attr = {'class': x.valid_tag.numeric_code}
    if crs is None:
        geometry = json.loads(x.valid_object.the_geom.geojson)
    else:
        geometry = json.loads(x.valid_object.the_geom.transform(crs, clone=True).geojson)
    feature = {
        "type": "Feature",
        "geometry": geometry,
        "properties": attr
    }
    return feature

In [79]:
def fun(id_dc_tile, predict_name, geometry_region_proj):
    seg = PredictObject.objects.filter(id=id_dc_tile)
    s3_path = seg[0].path
    poly = seg[0].the_geom
    #next lines to reproyect extent registered in DB TODO: register geometry of 
    #extent of each dc tile in lat long
    poly_geojson = poly.geojson
    geometry = json.loads(poly_geojson)
    with fiona.open(s3_path) as src:
        proj4_in = to_string(src.crs)
        geometry_proj = geometry_transform(geometry,proj4_out,crs_in=proj4_in)
        poly_proj = GEOSGeometry(json.dumps(geometry_proj))
        qs_dc_tile = ValidClassification.objects.filter(valid_object__the_geom__contained=poly_proj,
                                                   valid_set=validation_set).prefetch_related('valid_object', 'valid_tag') 
    
        fc_qs = [valid_object_to_feature(x, proj4_in) for x in qs_dc_tile]
        if geometry_region_proj:
            shape_region=shape(geometry_region_proj)
            fc_qs_in_region = [(mapping(shape_region.intersection(shape(x['geometry']))),
                                x['properties']['class']) for x in fc_qs if shape_region.intersects(shape(x['geometry']))]
            fc_qs = fc_qs_in_region
            fc_qs_in_region = None 
        
        #create fc with (geometry, tag) values
        pred_objects_sorted = PredictClassification.objects.filter(name=predict_name,
                                                                   predict_object_id=id_dc_tile).prefetch_related('tag').order_by('features_id')
        fc_pred=[(x['properties']['id'], x['geometry']) for x in src]
        fc_pred_sorted = sorted(fc_pred, key=itemgetter(0))
        fc_pred = [(x[0][1], x[1].tag.numeric_code) for x in zip(fc_pred_sorted, pred_objects_sorted)]
        fc_pred_sorted = None
        pred_objects_sorted = None
        #intersect with fc of validation set
        fc_pred_intersect_validset = [(x[0],x[1]) for x in fc_pred for y in fc_qs if shape(x[0]).intersects(shape(y[0]))]
        fc_pred = None
    return [fc_qs, fc_pred_intersect_validset]

In [80]:
scheduler_file= '/shared_volume/scheduler.json'

In [81]:
client = Client(scheduler_file=scheduler_file)

In [82]:
client

0,1
Client  Scheduler: tcp://100.96.3.14:8786  Dashboard: http://100.96.3.14:8787/status,Cluster  Workers: 3  Cores: 3  Memory: 36.00 GB


In [83]:
client.restart()

0,1
Client  Scheduler: tcp://100.96.3.14:8786  Dashboard: http://100.96.3.14:8787/status,Cluster  Workers: 3  Cores: 3  Memory: 36.00 GB


In [84]:
c = client.map(fun,list_ids[0:4],**{'predict_name': predict_name,
                              'geometry_region_proj': geometry_region_proj})

In [85]:
result = client.gather(c)

In [110]:
fc_valid_result = [x[0][index] for x in result for index in range(0,len(x[0]))]

In [111]:
fc_test_result = [x[1][index] for x in result for index in range(0,len(x[1]))]

In [112]:
len(fc_valid_result)

7

In [113]:
len(fc_test_result)

40

In [117]:
from madmex.validation import prepare_validation

In [119]:
y_true, y_pred, sample_weight = prepare_validation(fc_valid_result, fc_test_result)

In [123]:
len(y_true)

40

In [124]:
len(y_pred)

40

In [125]:
from madmex.validation import validate

In [126]:
acc_dict = validate(y_true=y_true, y_pred=y_pred, sample_weight=sample_weight,
                    scheme='madmex_31')

  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [127]:
from madmex.validation import pprint_val_dict

In [128]:
pprint_val_dict(acc_dict)

Numeric code    User's Accuracy      Producer's Accuracy  Class Name                                        
2               0.00                 0.00                                                                   
3               0.99                 0.58                                                                   
11              0.79                 1.00                                                                   
17              0.00                 0.00                                                                   
28              0.97                 0.91                                                                   
31              1.00                 1.00                                                                   
-----
Overall Accuracy: 0.86

-----
Confusion matrix
-----
      | 2       3       11      17      28      31     
      | ------- ------- ------- ------- ------- -------
2     | 0.00    0.00    0.00    0.00    0.00    0.00   
3     | 24