<h1>Notebook to Make Collections of Cells Over Shapefile Features</h1>
<p>Calculating zonal statistics over many polygons using high-resolution datasets is a computationally expensive task. To split the task into chunks, this notebook will create N JSON files containing a collection of polygons over N features in a shapefile layer. After the N files have been created, they can individually be run through the Enrich notebook.</p>

In [None]:
from osgeo import ogr, osr
from shapely import wkt
import geopandas as gpd
from shapely.geometry import box, Polygon, mapping
import math
import json

<h2>These are all variables that need to be set before running the notebook</h2>

In [None]:
#shapefile containing polygons to split over
shapefilePath = "path_to_shapefile_containing_features_to_split_over"

#directory to save enriched JSONs to
outputDirectory = "path_to_store_output\\"

In [None]:
#input in latitude/longitude
#output: dataframe containing grid cell polygons, also in lat/lon
def makeGrid(minx, miny, maxx, maxy, res):
    bbox = box(minx, miny, maxx, maxy)
    polygons = []
    df = gpd.GeoDataFrame({
            'geometry': [bbox]
        })
    df.crs = "EPSG:4326"
    
    avg_longitude = (minx + maxx)/2
    utm_zone = int(math.floor((avg_longitude + 180) / 6.) + 1)
    utm_crs = f'+proj=utm +zone={utm_zone} +ellps=WGS84 +datum=WGS84 +units=m +no_defs'
   
    # project the GeoDataFrame to the UTM CRS
    df_projected = df.to_crs(utm_crs)
    
    totalBounds = df_projected.total_bounds
   
    xExtent = totalBounds[2] - totalBounds[0]
    yExtent = totalBounds[3] - totalBounds[1]
    nx = math.ceil(xExtent/res)
    ny = math.ceil(yExtent/res)
    xmin,ymin,xmax,ymax =  totalBounds
    for i in range(nx):
        yTop = ymax
        for j in range(ny):
            polygons.append(Polygon([(xmin, yTop), (xmin + res, yTop), (xmin + res, yTop - res), (xmin, yTop - res)]))   
            yTop = yTop - res
        xmin = xmin + res
    grid = gpd.GeoDataFrame({'geometry':polygons})
    grid.crs = df_projected.crs
    grid_project = grid.to_crs(4326)
    return grid_project

In [None]:
#open shapefile to split over. Coordinates should be WGS84 4326
shapeFile = ogr.Open(shapefilePath)
shapes = shapeFile.GetLayer(0)

#get shapefile projection
spatialRef = shapes.GetSpatialRef()

forGeo = []
#loop through features in shapefile
for i in range(shapes.GetFeatureCount()):
    feature = shapes.GetFeature(i)
    geom = feature.GetGeometryRef()
    
    #envelope is the ogr name for bounding box 
    #https://gdal.org/python/osgeo.ogr.Geometry-class.html
    bbox = geom.GetEnvelope()
    
    #def makeGrid(minx, miny, maxx, maxy, res)
    thisGrid = makeGrid(bbox[0],bbox[2],bbox[1],bbox[3],250)

    #get in shapely format
    geomFormat = wkt.loads(geom.ExportToWkt())
    precise_matches = thisGrid[thisGrid.intersects(geomFormat)]
    data = mapping(precise_matches.geometry)
    #don't need bbox for anything so delete it to reduce output file size
    del data['bbox']
    for feature in data['features']:
        del feature['bbox']
        
    with open(outputDirectory+"chunk"+str(i)+".json", "w") as f:
        json.dump(data,f)
    