<h1>Notebook to Make Collections of Cells Over Shapefile Features</h1>
<p>Calculating zonal statistics over many polygons using high-resolution datasets is a computationally expensive task. To split the task into chunks, this notebook will create N JSON files containing a collection of polygons over N features in a shapefile layer. After the N files have been created, they can individually be run through the Enrich notebook.</p>

In [33]:
from osgeo import ogr, osr
from shapely import wkt
import geopandas as gpd
from shapely.geometry import box, Polygon, mapping
import math
import json

In [28]:
#input in latitude/longitude
#output: dataframe containing grid cell polygons, also in lat/lon
def makeGrid(minx, miny, maxx, maxy, res):
    bbox = box(minx, miny, maxx, maxy)
    polygons = []
    df = gpd.GeoDataFrame({
            'geometry': [bbox]
        })
    df.crs = "EPSG:4326"
    
    avg_longitude = (minx + maxx)/2
    utm_zone = int(math.floor((avg_longitude + 180) / 6.) + 1)
    utm_crs = f'+proj=utm +zone={utm_zone} +ellps=WGS84 +datum=WGS84 +units=m +no_defs'
   
    # project the GeoDataFrame to the UTM CRS
    df_projected = df.to_crs(utm_crs)
    
    #df_projected = ox.project_gdf(df)
    
    totalBounds = df_projected.total_bounds
   
    xExtent = totalBounds[2] - totalBounds[0]
    yExtent = totalBounds[3] - totalBounds[1]
    nx = math.ceil(xExtent/res)
    ny = math.ceil(yExtent/res)
    xmin,ymin,xmax,ymax =  totalBounds
    for i in range(nx):
        yTop = ymax
        for j in range(ny):
            polygons.append(Polygon([(xmin, yTop), (xmin + res, yTop), (xmin + res, yTop - res), (xmin, yTop - res)]))   
            yTop = yTop - res
        xmin = xmin + res
    grid = gpd.GeoDataFrame({'geometry':polygons})
    grid.crs = df_projected.crs
    grid_project = grid.to_crs(4326)
    return grid_project

In [36]:
#open shapefile to split over. Coordinates should be WGS84 4326
shapeFile = ogr.Open(r'path_to_shapefile_containing_features_to_split_over')
shapes = shapeFile.GetLayer(0)

#get shapefile projection
spatialRef = shapes.GetSpatialRef()

forGeo = []
#loop through features in shapefile
for i in range(shapes.GetFeatureCount()):
    feature = shapes.GetFeature(i)
    geom = feature.GetGeometryRef()
    
    #envelope is the ogr name for bounding box 
    #https://gdal.org/python/osgeo.ogr.Geometry-class.html
    bbox = geom.GetEnvelope()
    
    #def makeGrid(minx, miny, maxx, maxy, res)
    thisGrid = makeGrid(bbox[0],bbox[2],bbox[1],bbox[3],250)

    #get in shapely format
    geomFormat = wkt.loads(geom.ExportToWkt())
    precise_matches = thisGrid[thisGrid.intersects(geomFormat)]
    data = mapping(precise_matches.geometry)
    #don't need bbox for anything so delete it to reduce output file size
    del data['bbox']
    for feature in data['features']:
        del feature['bbox']
        
    with open("path_to_store_output\\"+"chunk"+str(i)+".json", "w") as f:
        json.dump(data,f)
    

GEOGCS["WGS 84",
    DATUM["WGS_1984",
        SPHEROID["WGS 84",6378137,298.257223563,
            AUTHORITY["EPSG","7030"]],
        AUTHORITY["EPSG","6326"]],
    PRIMEM["Greenwich",0,
        AUTHORITY["EPSG","8901"]],
    UNIT["degree",0.0174532925199433,
        AUTHORITY["EPSG","9122"]],
    AXIS["Latitude",NORTH],
    AXIS["Longitude",EAST],
    AUTHORITY["EPSG","4326"]]
                                                 geometry
0       POLYGON ((83.86996 28.38577, 83.87251 28.38583...
1       POLYGON ((83.87003 28.38352, 83.87258 28.38358...
2       POLYGON ((83.87010 28.38127, 83.87264 28.38133...
3       POLYGON ((83.87016 28.37901, 83.87271 28.37907...
4       POLYGON ((83.87023 28.37676, 83.87278 28.37682...
...                                                   ...
871076  POLYGON ((86.58091 26.57943, 86.58342 26.57944...
871077  POLYGON ((86.58091 26.57718, 86.58342 26.57718...
871078  POLYGON ((86.58092 26.57492, 86.58343 26.57493...
871079  POLYGON ((86.58093 26.57266, 8