In [None]:
import geojson
import geopandas as gpd
import pygeos
import descarteslabs as dl
import pandas as pd
from shapely import geometry, wkt
from shapely.ops import cascaded_union
import numpy as np
import json, os, sys
import yaml
import pickle
import matplotlib.pyplot as plt
from area import area
gpd.options.use_pygeos=False

In [None]:
root = os.path.abspath(os.path.join(os.getcwd(),'..','..','..'))

- Combine airbus and cv polygons
- get S2-shaped tiles for all polygons
- filter those tiles for only those within airbus tiles

### Get Test Set Data

In [None]:
test_tiles = gpd.read_file(os.path.join(root,'data','testset_aois.geojson'))
test_polys = gpd.read_file(os.path.join(root,'data','test_set_handlabelled.geojson'))
extra_polys = gpd.read_file(os.path.join(root,'data','extra_pr_fts.geojson'))

In [None]:
test_polys = test_polys.append(extra_polys)

In [None]:
# get interim vetor products
config = yaml.safe_load(open(os.path.join(root,'cloud_products_exec.yaml'),'r'))
for kk in config.keys():
    print (config[kk]['cloud_id'])
vector_keys = [kk for kk in config.keys() if kk.split('-')[1][0]=='V']
vector_keys=vector_keys[1:]
print(vector_keys)

In [None]:
test_tiles_mp = test_tiles.unary_union

In [None]:
test_polys_mp = test_polys.unary_union

In [None]:
# download intertim features
for kk in vector_keys:
    fc = dl.vectors.FeatureCollection(config[kk]['cloud_id'])
    fts = [geojson.Feature(geometry=f.geometry, properties=f.properties) for f in fc.filter(test_tiles_mp).features()]
    print (kk, len(fts))
    json.dump(geojson.FeatureCollection(fts),open(os.path.join(root,'data','test_data',kk+'.geojson'),'w'))

In [None]:
test_polys

### Get final product

In [None]:
final_polys=gpd.read_file(os.path.join(root,'data','ABCD_simplified.geojson'))

In [None]:
final_polys = final_polys[final_polys.intersects(test_tiles_mp)]

In [None]:
final_polys.to_file(os.path.join(root,'data','test_data','compile_final.geojson'),driver='GeoJSON')

### Load downloaded features

In [None]:
gdfs = {}
for kk in vector_keys + ['pre-handlabel']:
    big_json = json.load(open(os.path.join(root,'data','test_data',kk+'.geojson'),'r'))
    for ii_f,ft in enumerate(big_json['features']):
        ft['properties'] = {}
    gdfs[kk] = gpd.GeoDataFrame.from_features(big_json['features'])
    gdfs[kk] = gdfs[kk].set_crs('epsg:4326')
    print (kk)
gdfs['compiled_final']=gpd.read_file(os.path.join(root,'data','test_data','compile_final.geojson'))

### make sure pre-handlabel is intersecting SPOT-V2 or S2-V4

In [None]:
pg_geoms = [pygeos.io.from_shapely(geom) for geom in gdfs['S2-V4-Final']['geometry'].values] + [pygeos.io.from_shapely(geom) for geom in gdfs['SPOT-V2-Filtered']['geometry'].values]
intersect_tree = pygeos.STRtree(pg_geoms)

In [None]:
Q = intersect_tree.query_bulk([pygeos.io.from_shapely(geom) for geom in gdfs['pre-handlabel']['geometry'].values], predicate='intersects')

In [None]:
gdfs['pre-handlabel'] = gdfs['pre-handlabel'].loc[gdfs['pre-handlabel'].index.isin(np.unique(Q[0,:])),:]

In [None]:
gdfs['pre-handlabel'].to_file(os.path.join(root,'data','prehandlabel_filtered.geojson'), driver='GeoJSON')

#### Get Area

In [None]:
test_tiles['geoarea'] = test_tiles['geometry'].apply(lambda el: area(geometry.mapping(el)))

In [None]:
test_polys['geoarea'] = test_polys['geometry'].apply(lambda el: area(geometry.mapping(el)))

In [None]:
for kk in gdfs.keys():
    gdfs[kk]['geoarea'] = gdfs[kk]['geometry'].apply(lambda el: area(geometry.mapping(el)))

#### Some filters on S2: >30m^2; cascade-union-reset

In [None]:
np.log10(gdfs['pre-handlabel']['geoarea']).hist(bins=50)

In [None]:
res = np.histogram(np.log10(gdfs['S2-V2-Secondary']['geoarea']), bins=50)

In [None]:
print (list(zip(10**res[1],res[0])))

In [None]:
for key in ['S2-V1-Primary','S2-V2-Secondary', 'S2-V3-Deepstack', 'S2-V4-Final']:
    gdfs[key] = gdfs[key][gdfs[key]['geoarea']>30]
    mp = cascaded_union( gdfs[key].geometry.values)
    print (key, len(list(mp)))
    gdfs[key] = gpd.GeoDataFrame(geometry=list(mp), crs={'init': 'epsg:4326'})
    gdfs[key]['geoarea'] = gdfs[key].to_crs({'init': 'epsg:3857'}).area

### precision / recall - full areas

In [None]:
# area-binned object level recall -> bootstrap

In [None]:
area_bins = [30,1e2,1e3,1e4,1e5,1e6,1e10]

In [None]:
res_dict= {}

In [None]:
test_tree = pygeos.STRtree([pygeos.io.from_shapely(geom) for geom in test_polys['geometry'].values])

In [None]:

for kk,gdf in gdfs.items():
    res_dict[kk] = {}
    #pipe_mp = gdf.unary_union
    gdf_tree = pygeos.STRtree([pygeos.io.from_shapely(geom) for geom in gdf['geometry'].values])
    for ii_a in range(len(area_bins)-1):
        res_dict[kk][ii_a] = {}
        # precision = TP/(TP+FP)
        # query the test set with the area slice
        
        bin_slice = gdf[(gdf.geoarea>=area_bins[ii_a]) & (gdf.geoarea<area_bins[ii_a+1])]
        if len(bin_slice)>0:
            Q = test_tree.query_bulk([pygeos.io.from_shapely(geom) for geom in bin_slice['geometry'].values], predicate='intersects')
            intersects = np.isin(np.arange(len(bin_slice)), np.unique(Q[0,:]))
            TP = np.sum(intersects)
            FP = np.sum(~intersects)
            res_dict[kk][ii_a]['P']=TP/(TP+FP)
            res_dict[kk][ii_a]['P_TP'] = TP
            res_dict[kk][ii_a]['P_FP'] = FP
        else:
            # no detections in that size
            res_dict[kk][ii_a]['P']=np.nan
            res_dict[kk][ii_a]['P_TP'] = 0
            res_dict[kk][ii_a]['P_FP'] = 0
        
        # recall = TP / (TP+FN)
        test_slice = test_polys[(test_polys.geoarea>=area_bins[ii_a])&(test_polys.geoarea<area_bins[ii_a+1])]
        Q = gdf_tree.query_bulk([pygeos.io.from_shapely(geom) for geom in test_slice['geometry'].values], predicate='intersects')
        intersects = np.isin(np.arange(len(test_slice)), np.unique(Q[0,:]))
        TP = np.sum(intersects)
        FN = np.sum(~intersects)
        res_dict[kk][ii_a]['R']=TP/(TP+FN)
        res_dict[kk][ii_a]['R_TP'] = TP
        res_dict[kk][ii_a]['R_FN'] = FN
        
        
        print (kk,area_bins[ii_a],res_dict[kk][ii_a])


In [None]:
pickle.dump(res_dict, open(os.path.join(root,'data','test_data','res_dict_all.pickle'),'wb'))

In [None]:
crs ={'init': 'epsg:4326'}

**IOU**

In [None]:
import networkx as nx

In [None]:
# iou
iou_dict = {}
for kk,gdf in gdfs.items():
    iou_dict[kk] = {}
    
    gdf['component_idx'] = np.nan
    
    # make the gdf tree
    gdf_tree = pygeos.STRtree([pygeos.io.from_shapely(geom) for geom in gdf['geometry'].values])
    
    # get the intersection groups with the full test set and create the adjacency graph
    Q = gdf_tree.query_bulk([pygeos.io.from_shapely(geom) for geom in test_polys['geometry'].values], predicate='intersects')
    G = nx.Graph()
    edges = [(f'test_{a}',f'pipe_{b}') for a,b in zip(Q[0,:],Q[1,:])]
    G.add_edges_from(edges)
    
    # use the connected components to map intersection on to the test set df
    print (kk, 'doing prep...')
    test_polys['intersection_geom'] = ''
    test_polys['union_area'] = np.nan
    test_polys['area_portion'] = np.nan
    for ii_g, g in enumerate(nx.connected_components(G)):
        test_idxs = [int(stridx.split('_')[1]) for stridx in list(g) if 'test' in stridx]
        pipe_idxs = [int(stridx.split('_')[1]) for stridx in list(g) if 'pipe' in stridx]
        
        # mark the component on the gdf
        gdf.iloc[pipe_idxs,gdf.columns.get_loc('component_idx')] = ii_g

        for idx in test_idxs:
            mp = gdf.iloc[pipe_idxs,:].unary_union
            geom_intersection = mp.intersection(test_polys.iloc[idx,test_polys.columns.get_loc('geometry')].buffer(0))
            test_polys.iloc[idx,test_polys.columns.get_loc('intersection_geom')] = geom_intersection.wkt

        # also map total component union area and portion of component area
        test_mp = test_polys.iloc[test_idxs,:].unary_union
        test_mp_area = area(geometry.mapping(test_mp))
        test_polys.iloc[test_idxs,test_polys.columns.get_loc('union_area')] = area(geometry.mapping(gdf.iloc[pipe_idxs,:].unary_union))
        test_polys.iloc[test_idxs,test_polys.columns.get_loc('area_portion')] = test_polys.iloc[test_idxs,:].apply(lambda row: area(geometry.mapping(row['geometry']))/test_mp_area, axis=1)
    
    test_polys['intersection_area'] = test_polys['intersection_geom'].apply(lambda el: area(geometry.mapping(wkt.loads(el))) if not el=='' else 0)
    test_polys['specific_union_area'] = test_polys['area_portion'] * test_polys['union_area']

    
    for ii_a in range(len(area_bins)-1):
        
        iou_dict[kk][ii_a] = {}
        
        # then, for each area bin:
        ## sum the intersections -> intersection
        ## sum the componnet unions * the portion of component area
        ## ... and add any non-component geoms from the gdf -> union
        
        gross_intersection_area = test_polys.loc[(test_polys.geoarea>=area_bins[ii_a])&(test_polys.geoarea<area_bins[ii_a+1]), 'intersection_area'].sum()
        
        gross_union_area = test_polys.loc[(test_polys.geoarea>=area_bins[ii_a])&(test_polys.geoarea<area_bins[ii_a+1]), 'specific_union_area'].sum()
        gross_union_area = gross_union_area + gdf.loc[(gdf['component_idx'].isna()) & (gdf.geoarea>=area_bins[ii_a])&(gdf.geoarea<area_bins[ii_a+1]),'geoarea'].sum()
    
        
        iou_dict[kk][ii_a]['i'] = gross_intersection_area
        iou_dict[kk][ii_a]['u'] = gross_union_area
        iou_dict[kk][ii_a]['iou'] = gross_intersection_area / gross_union_area
        
        print (ii_a,iou_dict[kk][ii_a])
        
        


In [None]:
pickle.dump(iou_dict, open('./iou_dict_all.pickle','wb'))

### Area Error

In [None]:
from scipy.stats import norm

In [None]:
test_polys['area_error'] =  test_polys['specific_union_area'] / test_polys['geoarea'] - 1 

In [None]:
area_dict = {}
fig, axs = plt.subplots(1,len(area_bins)-1,figsize=(15,3))

for ii_a in range(len(area_bins)-1):
    axs[ii_a].hist(test_polys.loc[(test_polys.geoarea>=area_bins[ii_a])&(test_polys.geoarea<area_bins[ii_a+1]), 'area_error'], bins=30)
    mu, sigma = norm.fit(test_polys.loc[(test_polys.geoarea>=area_bins[ii_a])&(test_polys.geoarea<area_bins[ii_a+1])&(~test_polys['area_error'].isna()), 'area_error'].values)
    N = test_polys.loc[(test_polys.geoarea>=area_bins[ii_a])&(test_polys.geoarea<area_bins[ii_a+1])&(~test_polys['area_error'].isna()), 'area_error'].size
    area_dict[ii_a] = {'ii_a':ii_a,'mu':mu,'sigma':sigma,'N':N}
    
    print (ii_a,mu, sigma,N)

In [None]:
pickle.dump(area_dict, open('./area_dict.pickle','wb'))

### precision/recall/iou - two areas

In [None]:
area_bins = [1e4, 1e10]

In [None]:
res_dict= {}

In [None]:
test_tree = pygeos.STRtree([pygeos.io.from_shapely(geom) for geom in test_polys['geometry'].values])

In [None]:
for kk,gdf in gdfs.items():
    res_dict[kk] = {}
    #pipe_mp = gdf.unary_union
    gdf_tree = pygeos.STRtree([pygeos.io.from_shapely(geom) for geom in gdf['geometry'].values])
    for ii_a in range(len(area_bins)-1):
        res_dict[kk][ii_a] = {}
        # precision = TP/(TP+FP)
        # query the test set with the area slice
        
        bin_slice = gdf[(gdf.geoarea>=area_bins[ii_a]) & (gdf.geoarea<area_bins[ii_a+1])]
        if len(bin_slice)>0:
            Q = test_tree.query_bulk([pygeos.io.from_shapely(geom) for geom in bin_slice['geometry'].values], predicate='intersects')
            intersects = np.isin(np.arange(len(bin_slice)), np.unique(Q[0,:]))
            TP = np.sum(intersects)
            FP = np.sum(~intersects)
            res_dict[kk][ii_a]['P']=TP/(TP+FP)
            res_dict[kk][ii_a]['P_TP'] = TP
            res_dict[kk][ii_a]['P_FP'] = FP
        else:
            # no detections in that size
            res_dict[kk][ii_a]['P']=np.nan
            res_dict[kk][ii_a]['P_TP'] = 0
            res_dict[kk][ii_a]['P_FP'] = 0
        
        # recall = TP / (TP+FN)
        test_slice = test_polys[(test_polys.geoarea>=area_bins[ii_a])&(test_polys.geoarea<area_bins[ii_a+1])]
        Q = gdf_tree.query_bulk([pygeos.io.from_shapely(geom) for geom in test_slice['geometry'].values], predicate='intersects')
        intersects = np.isin(np.arange(len(test_slice)), np.unique(Q[0,:]))
        TP = np.sum(intersects)
        FN = np.sum(~intersects)
        res_dict[kk][ii_a]['R']=TP/(TP+FN)
        res_dict[kk][ii_a]['R_TP'] = TP
        res_dict[kk][ii_a]['R_FN'] = FN
        
        
        print (kk,area_bins[ii_a],res_dict[kk][ii_a])


In [None]:
pickle.dump(res_dict,open('./res_dict_10k.pickle','wb'))

In [None]:
# iou
iou_dict = {}
for kk,gdf in gdfs.items():
    iou_dict[kk] = {}
    
    gdf['component_idx'] = np.nan
    
    # make the gdf tree
    gdf_tree = pygeos.STRtree([pygeos.io.from_shapely(geom) for geom in gdf['geometry'].values])
    
    # get the intersection groups with the full test set and create the adjacency graph
    Q = gdf_tree.query_bulk([pygeos.io.from_shapely(geom) for geom in test_polys['geometry'].values], predicate='intersects')
    G = nx.Graph()
    edges = [(f'test_{a}',f'pipe_{b}') for a,b in zip(Q[0,:],Q[1,:])]
    G.add_edges_from(edges)
    
    # use the connected components to map intersection on to the test set df
    print (kk, 'doing prep...')
    test_polys['intersection_geom'] = ''
    test_polys['union_area'] = np.nan
    test_polys['area_portion'] = np.nan
    for ii_g, g in enumerate(nx.connected_components(G)):
        test_idxs = [int(stridx.split('_')[1]) for stridx in list(g) if 'test' in stridx]
        pipe_idxs = [int(stridx.split('_')[1]) for stridx in list(g) if 'pipe' in stridx]
        
        # mark the component on the gdf
        gdf.iloc[pipe_idxs,gdf.columns.get_loc('component_idx')] = ii_g

        for idx in test_idxs:
            mp = gdf.iloc[pipe_idxs,:].unary_union
            geom_intersection = mp.intersection(test_polys.iloc[idx,test_polys.columns.get_loc('geometry')].buffer(0))
            test_polys.iloc[idx,test_polys.columns.get_loc('intersection_geom')] = geom_intersection.wkt

        # also map total component union area and portion of component area
        test_mp = test_polys.iloc[test_idxs,:].unary_union
        test_mp_area = area(geometry.mapping(test_mp))
        test_polys.iloc[test_idxs,test_polys.columns.get_loc('union_area')] = area(geometry.mapping(gdf.iloc[pipe_idxs,:].unary_union))
        test_polys.iloc[test_idxs,test_polys.columns.get_loc('area_portion')] = test_polys.iloc[test_idxs,:].apply(lambda row: area(geometry.mapping(row['geometry']))/test_mp_area, axis=1)
    
    test_polys['intersection_area'] = test_polys['intersection_geom'].apply(lambda el: area(geometry.mapping(wkt.loads(el))) if not el=='' else 0)
    test_polys['specific_union_area'] = test_polys['area_portion'] * test_polys['union_area']

    
    for ii_a in range(len(area_bins)-1):
        
        iou_dict[kk][ii_a] = {}
        
        # then, for each area bin:
        ## sum the intersections -> intersection
        ## sum the componnet unions * the portion of component area
        ## ... and add any non-component geoms from the gdf -> union
        
        gross_intersection_area = test_polys.loc[(test_polys.geoarea>=area_bins[ii_a])&(test_polys.geoarea<area_bins[ii_a+1]), 'intersection_area'].sum()
        
        gross_union_area = test_polys.loc[(test_polys.geoarea>=area_bins[ii_a])&(test_polys.geoarea<area_bins[ii_a+1]), 'specific_union_area'].sum()
        gross_union_area = gross_union_area + gdf.loc[(gdf['component_idx'].isna()) & (gdf.geoarea>=area_bins[ii_a])&(gdf.geoarea<area_bins[ii_a+1]),'geoarea'].sum()
    
        
        iou_dict[kk][ii_a]['i'] = gross_intersection_area
        iou_dict[kk][ii_a]['u'] = gross_union_area
        iou_dict[kk][ii_a]['iou'] = gross_intersection_area / gross_union_area
        
        print (ii_a,iou_dict[kk][ii_a])

In [None]:
pickle.dump(iou_dict, open('./iou_dict_10k.pickle','wb'))

### Plot Data

In [None]:
res_dict = pickle.load(open('./res_dict_2020-04-07.pkl','rb'))# open('../../data/res_dict.pkl','rb'))
iou_dict = pickle.load(open('./iou_dict_2020-04-07.pickle','rb'))# open('../../data/iou_dict.pickle','rb'))

In [None]:
for key, vv in res_dict.items():
    for ar,vv2 in vv.items():
        vv2['iou'] = iou_dict[key][ar]['iou']
        vv2['iou_neg'] = iou_dict[key][ar]['iou_neg']
        #vv2['iou_pos'] = iou_dict[key][ar]['iou_pos']

In [None]:
res_dict

In [None]:
iou_dict

In [None]:
from matplotlib.collections import LineCollection

In [None]:
title_dict = {'P':'Precision','R':'Recall','iou_neg':'Intersection-over-Union'}

In [None]:
plt.style.use('ggplot')

In [None]:
def hex2rgb(h):
    h = h.lstrip('#')
    return [int(h[i:i+2], 16) for i in (0, 2, 4)]

In [None]:
gg_colors = [tuple(ih/255 for ih in hex2rgb(ii['color'])) for ii in list(plt.rcParams['axes.prop_cycle'])[0:3]]

In [None]:
gg_colors

In [None]:
fig, axs = plt.subplots(len(area_bins)-1,3,figsize=(40,20),sharey=True, sharex=True)
# plot P
for ii_a in range(len(area_bins)-1):
    
    for ii_ax, M in enumerate(['P','R','iou_neg']):
        full_bars= [res_dict[kk][ii_a][M] for kk in gdfs.keys()] 

        bars = [full_bars[0]] + \
            [(full_bars[ii] - full_bars[ii-1]) for ii in range(1,4)] + \
            [full_bars[4], full_bars[5]-full_bars[4]] +\
            [full_bars[6]]
        bottoms = [0]+\
                    [full_bars[ii-1] for ii in range(1,4)] +\
                    [0,full_bars[4],0]
        
        lines_y = [el for el in full_bars for _ in (0,1)]
        lines_x = [0] + [el for el in range(1,6) for _ in (0,1)] + [6]
        segs = [[[lines_x[ii], lines_y[ii]],[lines_x[ii+1],lines_y[ii+1]]] for ii in range(0,12,2)]
        
        segs[3][1][0]=3.5
        segs[5][1][0]=5.5
        segs.append([segs[3][1],[segs[3][1][0],full_bars[6]]])
        segs.append([[segs[3][1][0],full_bars[6]],[6,full_bars[6]]])
        segs.append([segs[5][1],[segs[5][1][0],full_bars[6]]])
        
        
        line_segments = LineCollection(segs, colors=[gg_colors[0]]*4 + [gg_colors[1]]*2 + [gg_colors[2]]*3, alpha=0.5)
        axs[ii_a,ii_ax].add_collection(line_segments)

        colors = [gg_colors[0]]*4 + [gg_colors[1]]*2 + [gg_colors[2]]


        axs[ii_a,ii_ax].bar(range(len(gdfs.keys())),bars, bottom=bottoms, edgecolor=colors, linewidth=2,color=colors)
        
        for ii in range(7):
            H=0.05
            if ((M=='R' and ii_a==0) or (M=='iou_neg' and ii_a)):
                H=.1
                
            axs[ii_a,ii_ax].text(ii,H,f'{full_bars[ii]:.0%}', horizontalalignment='center')

        
        axs[ii_a,ii_ax].set_xticklabels(['','S1-V1','S1-V2','S1-V3','S1-V4','SPOT-V1','SPOT-V2','Final'])
        
        if ii_a==0:
            axs[ii_a,ii_ax].set_title(title_dict[M],fontsize=24)
            axs[ii_a,ii_ax].set_ylim([0,1])
        
    axs[ii_a,0].set_ylabel(f'{area_bins[ii_a]:,.0f} to {area_bins[ii_a+1]:,.0f} m$^2$')
    
fig.savefig('../analysis/P-R-iou.png')

### Single measure 10k+

In [None]:
res_dict = pickle.load(open(os.path.join(root,'data','res_dict_gt10k.pickle'),'rb'))# open('../../data/res_dict.pkl','rb'))
iou_dict = pickle.load(open(os.path.join(root,'data','iou_dict_gt10k.pickle'),'rb'))# open('../../data/iou_dict.pickle','rb'))

In [None]:
for key, vv in res_dict.items():
    for ar,vv2 in vv.items():
        vv2['iou'] = iou_dict[key][ar]['iou']
        vv2['iou_neg'] = iou_dict[key][ar]['iou_neg']
        #vv2['iou_pos'] = iou_dict[key][ar]['iou_pos']

In [None]:
res_dict['compiled_final'][0]['P']=1.0

In [None]:
res_dict

In [None]:
iou_dict

In [None]:
from matplotlib.collections import LineCollection
import matplotlib.ticker as mtick

In [None]:
title_dict = {'P':'Precision','R':'Recall','iou_neg':'Intersection-over-Union'}

In [None]:
plt.style.use('ggplot')

In [None]:
def hex2rgb(h):
    h = h.lstrip('#')
    return [int(h[i:i+2], 16) for i in (0, 2, 4)]

In [None]:
gg_colors = [tuple(ih/255 for ih in hex2rgb(ii['color'])) for ii in list(plt.rcParams['axes.prop_cycle'])[0:3]]

In [None]:
gg_colors

In [None]:
area_bins = [1e4, 1e10]

In [None]:
fig, axs = plt.subplots(len(area_bins)-1,3,figsize=(18,4),sharey=True, sharex=True)
axs = axs.reshape((1,-1))
# plot P
for ii_a in range(len(area_bins)-1):
    
    for ii_ax, M in enumerate(['P','R','iou_neg']):
        full_bars= [res_dict[kk][ii_a][M] for kk in res_dict.keys()] 

        bars = [full_bars[0]] + \
            [(full_bars[ii] - full_bars[ii-1]) for ii in range(1,4)] + \
            [full_bars[4], full_bars[5]-full_bars[4]] +\
            [full_bars[6]]
        bottoms = [0]+\
                    [full_bars[ii-1] for ii in range(1,4)] +\
                    [0,full_bars[4],0]
        
        lines_y = [el for el in full_bars for _ in (0,1)]
        lines_x = [0] + [el for el in range(1,6) for _ in (0,1)] + [6]
        segs = [[[lines_x[ii], lines_y[ii]],[lines_x[ii+1],lines_y[ii+1]]] for ii in range(0,12,2)]
        
        segs[3][1][0]=3.5
        segs[5][1][0]=5.5
        segs.append([segs[3][1],[segs[3][1][0],full_bars[6]]])
        segs.append([[segs[3][1][0],full_bars[6]],[6,full_bars[6]]])
        segs.append([segs[5][1],[segs[5][1][0],full_bars[6]]])
        
        
        line_segments = LineCollection(segs, colors=[gg_colors[0]]*4 + [gg_colors[1]]*2 + [gg_colors[2]]*3, alpha=0.5)
        axs[ii_a,ii_ax].add_collection(line_segments)

        colors = [gg_colors[0]]*4 + [gg_colors[1]]*2 + [gg_colors[2]]


        axs[ii_a,ii_ax].bar(range(len(res_dict.keys())),bars, bottom=bottoms, edgecolor=colors, linewidth=2,color=colors)
        
        for ii in range(7):
            H=0.05
            if ((M=='R' and ii_a==0) or (M=='iou_neg' and ii_a)):
                H=.1
                
            axs[ii_a,ii_ax].text(ii,H,f'{full_bars[ii]:.0%}', horizontalalignment='center')

        
        axs[ii_a,ii_ax].set_xticklabels(['','S1-V1','S1-V2','S1-V3','S1-V4','SPOT-V1','SPOT-V2','Final'])
        
        if ii_a==0:
            axs[ii_a,ii_ax].set_title(title_dict[M],fontsize=24)
            axs[ii_a,ii_ax].set_ylim([0,1])
            


    axs[ii_a,0].yaxis.set_major_formatter(mtick.PercentFormatter(1))
        
    #axs[ii_a,0].set_ylabel(f'Installation area > 10,000m$^2$')
    
fig.savefig(os.path.join(root,'makefigs','P-R-iou_single.png'))

### dDEP

In [None]:
cv_tiles = gpd.read_file(os.path.join(root,'data','cv_all_tiles.geojson'))
cv_polys = gpd.read_file(os.path.join(root,'data','cv_all_polys.geojson'))

In [None]:
cv_polys_mp = geometry.MultiPolygon([geom for geom in cv_polys.unary_union.geoms])

In [None]:
cv_tiles_mp =geometry.MultiPolygon([geom for geom in cv_tiles.unary_union.geoms])

In [None]:
gdf = gpd.read_file(os.path.join(root,'data','ABCD_finalized.geojson'))

In [None]:
gdf_cv = gdf[gdf.intersects(cv_tiles_mp)]

In [None]:
gdf_cv.to_file(os.path.join(root,'gdf_cv.shp'))

In [None]:
len(gdf_cv)

In [None]:
len(cv_polys)

### Download pipeline interim features

In [None]:
config = yaml.safe_load(open('../../cloud_products_exec.yaml','r'))

In [None]:
for kk in config.keys():
    print (config[kk]['cloud_id'])

In [None]:
vector_keys = [kk for kk in config.keys() if kk.split('-')[1][0]=='V']

In [None]:
vector_keys=vector_keys[1:]

In [None]:
vector_keys

In [None]:
for kk in vector_keys:
    fc = dl.vectors.FeatureCollection(config[kk]['cloud_id'])
    fts = [geojson.Feature(geometry=f.geometry, properties=f.properties) for f in fc.filter(cv_tiles_mp).features()]
    print (kk, len(fts))
    json.dump(geojson.FeatureCollection(fts),open('../data/crossvalidation/'+kk+'.geojson','w'))

In [None]:
        # iou = intersection / union
        bindf_slice = gdf[(gdf.geoarea>=area_bins[ii_a]) & (gdf.geoarea<area_bins[ii_a+1])]
        bindf_mp = bindf_slice.unary_union
        test_slice = 
        test_mp = test_slice.unary_union
        
        intersection=bindf_mp.intersection(test_mp)
        union=bindf_mp.union(test_mp)
        

        if not intersection.is_empty:
            #print (intersection)
        
            if intersection.type=='Polygon':
                
                i_gdf = gpd.GeoDataFrame(geometry=[intersection], crs=crs)
            else:
                i_gdf = gpd.GeoDataFrame(geometry=list(intersection), crs=crs)

            if union.type=='Polygon':
                u_gdf = gpd.GeoDataFrame(geometry=list(union), crs=crs)
            else:
                u_gdf = gpd.GeoDataFrame(geometry=list(union), crs=crs)

            i_gdf['geoarea'] = i_gdf.to_crs({'init': 'epsg:3857'}).area
            u_gdf['geoarea'] = u_gdf.to_crs({'init': 'epsg:3857'}).area

            iou_dict[kk][ii_a]['iou'] = i_gdf['geoarea'].sum() / u_gdf['geoarea'].sum()
            
        else:
            iou_dict[kk][ii_a]['iou'] = 0
        
        
        print (kk,area_bins[ii_a],iou_dict[kk][ii_a])