<h1>Run Enriched Polygons Through Trained OSM Completeness Model</h1>
<p>After enriching a collection of 250-m by 250-m grid cells, the output can be run through the trained OSM completeness model to produce predictions of OSM building footprint area.</p>

In [None]:
import json
import joblib
import geopandas as gpd
import pandas as pd
from shapely.geometry import shape, box, mapping, Point, Polygon
from matplotlib.patches import Polygon as mpoly
import matplotlib
import cartopy.crs as ccrs
import cartopy
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt

<h2>These are all variables that need to be set before running the notebook</h2>

In [None]:
#file containing enriched cells on which to run the trained model
inputFile = 'path_to_input_file'
#file containing trained model .sav file from training notebook
trainedModel = 'path_to_trained_model.sav'
#what output json file should be called (cells containing predicted area and completeness values)
outputFile = 'path_to_output_file'

#directory for images to be output
outputDirectory = 'output_directory\\'

#predicted area value below which a cell will be considered "no built up area"
noBuildingThresh=5000

In [None]:
#load data
with open(inputFile,'r') as data:
    x = json.load(data)

In [None]:
#create polygons for geodataframe
polygons=[]
for feature in x['features']:
    g = shape(feature['geometry']).buffer(0)
    polygons.append(g)
print(len(polygons))

In [None]:
def returnFeatureVals(featureString, variable):
    return [x['properties'][featureString] for x in variable['features']]

In [None]:
applyFeatureDict = {
    'ndbi':returnFeatureVals('ndbi', x),
    'ndvi':returnFeatureVals('ndvi', x),
    'savi':returnFeatureVals('savi', x),
    'ui':returnFeatureVals('ui', x),
    'viirs':returnFeatureVals('viirs', x),
    'slope':returnFeatureVals('slope', x),
    'texture':returnFeatureVals('texture', x),
    'forest':returnFeatureVals('forest', x),
    'popFB':returnFeatureVals('popFB',x),
    'popWP':returnFeatureVals('popWP',x),
    'popWSF':returnFeatureVals('popWSF',x),
    'area':returnFeatureVals('area',x)
}
applyDF = pd.DataFrame.from_dict(applyFeatureDict)
applyGeoDF = gpd.GeoDataFrame(applyDF,crs = 4326, geometry=polygons)
applyGeoDF = applyGeoDF.fillna(0)

In [None]:
model = joblib.load(trainedModel)

In [None]:
applyFeatureDF = applyGeoDF[['ndbi','ndvi','savi','ui','viirs','slope','texture','forest','popFB','popWP','popWSF']]
applyTargetDF = applyGeoDF['area']

In [None]:
y_apply = model.predict(applyFeatureDF)

In [None]:
for i,feature in enumerate(x['features']):
    area = x['features'][i]['properties']['area']
    #erase other properties for smaller output file
    feature['properties'] = {}
    #predicted OSM building footprint area
    feature['properties']['parea'] = y_apply[i]
    #actual mapped building footprint area
    feature['properties']['builtArea'] = area

In [None]:
#saving output
with open(outputFile,'w') as f:
    json.dump(x,f)

<h2>Plotting output for quick visualization</h2>

In [None]:
#import as dictionary
with open(outputFile,"r") as data:
    x = json.load(data)
#load as dataframe
plotdf = gpd.read_file(outputFile)

bounds = plotdf.total_bounds

bounds = [bounds[0], bounds[2], bounds[1], bounds[3]]

clat = (bounds[2]+bounds[3])/2
clon = (bounds[0]+bounds[1])/2

features = x['features']
polys=[]
#predicted area
vals = []
#completeness
vals2 = []
lws=[]
for i,feature in enumerate(features):
    if (i%1000)==0:
        print(f'{i} of {len(features)}')
    
    coords = feature['geometry']['coordinates'][0]
    x=[]
    y=[]
    
    vals.append(feature['properties']['parea'])
        
    try:
        vals2.append(feature['properties']['builtArea']/feature['properties']['parea'])
    except:
        vals2.append(0)
        
    for point in coords:
        x.append(point[0])
        y.append(point[1])
    transformed = ccrs.LambertConformal(central_latitude=clat,central_longitude=clon).transform_points(ccrs.PlateCarree(),np.asarray(x),np.asarray(y))
    polys.append(mpoly(transformed[:,0:2]))
    lws.append(0.05)

cmap = matplotlib.cm.get_cmap('viridis')
cmap2 = matplotlib.cm.get_cmap('RdYlGn')

norm = matplotlib.colors.Normalize(vmin=0,vmax=np.nanmax(vals))
norm2 = matplotlib.colors.Normalize(vmin=0,vmax=1)
fcs = cmap(norm(vals))
vals2 = np.asarray(vals2)
vals2[vals2>1] = 1
fcs2 = cmap2(norm2(vals2))

fcs2[np.asarray(vals)<noBuildingThresh] = (.9,.9,.9,1)


pc=PatchCollection(polys, facecolors=fcs,edgecolors=fcs,linewidths=lws)   
pc2=PatchCollection(polys, facecolors=fcs2,edgecolors=fcs2,linewidths=lws)   


fig = plt.figure(constrained_layout = True)

gs = fig.add_gridspec(20,10)
ax1 = fig.add_subplot(gs[0:19,:],projection=ccrs.LambertConformal(central_latitude=clat,central_longitude=clon))
ax2= fig.add_subplot(gs[19::,:])
ax1.set_extent(bounds)
ax1.set_title('Predicted Area',fontsize=10)
ax1.add_collection(pc)
cb=fig.colorbar(matplotlib.cm.ScalarMappable(norm=norm, cmap=cmap),
             cax=ax2, orientation='horizontal')
cb.ax.tick_params(labelsize=8)
cb.set_label(label="Area (square meters)",fontsize=8)
plt.savefig(outputDirectory+'predicted.png',bbox_inches='tight',dpi=1000)
plt.close(fig)

fig = plt.figure(constrained_layout = True)

gs = fig.add_gridspec(20,10)
ax1 = fig.add_subplot(gs[0:19,:],projection=ccrs.LambertConformal(central_latitude=clat,central_longitude=clon))
ax2= fig.add_subplot(gs[19::,:])
ax1.set_extent(bounds)
ax1.set_title('Ratio Predicted to Actual Area',fontsize=10)
ax1.add_collection(pc2)
cb=fig.colorbar(matplotlib.cm.ScalarMappable(norm=norm2, cmap=cmap2),
             cax=ax2, orientation='horizontal')
cb.ax.tick_params(labelsize=8)
cb.set_label(label="Ratio",fontsize=8)
plt.savefig(outputDirectory+'ratio.png',bbox_inches='tight',dpi=1000)
plt.close(fig)

In [None]:
#quantitative assessment of unmapped footprint area. Assumes a completeness ratio > 0.5 is mapped
builtUpIdx = np.nonzero(np.asarray(vals)>=noBuildingThresh)
builtUp = vals2[builtUpIdx]

areasBuilt = np.asarray(vals)[builtUpIdx]

completeIdx = np.nonzero(builtUp>=0.5)
incompleteIdx = np.nonzero(builtUp<0.5)
completeAreas = areasBuilt[completeIdx]
incompleteAreas = areasBuilt[incompleteIdx]

complete = builtUp[completeIdx]
incomplete = builtUp[incompleteIdx]

mappedBuildings=np.sum(completeAreas)
unmappedBuildings=np.sum(incompleteAreas)
print(f'{np.round(mappedBuildings)} meters squared has been mapped')
print(f'{np.round(unmappedBuildings)} meters squared needs to be mapped')
print(f'{np.round(100*mappedBuildings/(unmappedBuildings+mappedBuildings),decimals=1)} percent of footprint has been mapped')