# Cleaning up catchments

ArcGIS's Cost/Distance Allocation tools are the most efficient method of generating catchments for large collections of point features from friction surfaces. However the catchment extents need to be linked back to the point features and certain artefacts need to be cleaned up afterwards. This notebook helps with that.

In [1]:
import os, sys
import pandas as pd
import geopandas as gpd
import shapely
from shapely.geometry import shape, JOIN_STYLE

import re
import numpy as np
import rasterio
from rasterio.mask import mask
import rasterstats
from rasterstats import zonal_stats

#### Set directories

In [2]:
geo_dir = r'P:\PAK\GEO'
data_dir = r'../../data'
vect_in_dir = r'vect_inputs'
vect_out_dir = r'vect_out'

Projections

In [3]:
dest_crs = 32642

#### Read in files

KP boundaries

In [4]:
kp = gpd.read_file(r'P:\PAK\GEO\Boundaries\OCHA\pak_admbnda_adm1_ocha_pco_gaul_20181218.shp')

In [5]:
kp = kp[kp['ADM1_EN'] == 'Khyber Pakhtunkhwa']
kp = kp.to_crs(dest_crs)

In [6]:
# Buffer the polygon by 20km so we take in nearby markets and roads that may be used
kp.geometry = kp.buffer(20000)
# kp = kp.to_crs(4326) # change back for clipping of 4326 rasters

In [7]:
kp.head()

Unnamed: 0,Shape_Leng,Shape_Area,ADM1_EN,ADM1_PCODE,ADM1_REF,ADM1ALT1EN,ADM1ALT2EN,ADM0_EN,ADM0_PCODE,date,validOn,validTo,geometry
2,27.479368,9.901186,Khyber Pakhtunkhwa,PK2,,,,Pakistan,PK,2018-11-30,2018-12-18,,"POLYGON ((505910.537 3561734.747, 505840.700 3..."


Settlement catchments (from ArcGIS Distance Allocation)

In [8]:
shp_list = [f for f in os.listdir(os.path.join(data_dir,vect_in_dir)) if f.endswith('.shp')]

In [9]:
shp_list

['settle100_30min_catch_DistAlloc_Poly.shp',
 'settle200_30min_catch_DistAlloc_Poly.shp']

HRSL Population raster

In [21]:
with rasterio.open(os.path.join(geo_dir,r'Population/HRSL/KP_general_v15_32642.tif'),'r') as hrsl_raw:
    hrsl, hrsl_tform = mask(hrsl_raw,kp.geometry,crop=True, indexes=1)
    
    # update profile according to new clipped raster
    hrsl_prof = hrsl_raw.profile
    hrsl_prof['transform'] = hrsl_tform

FAO Landcover layers

In [11]:
with rasterio.open(os.path.join(geo_dir,r'Landcover/FAO/kp_lc_v1_AG_cl_32642.tif'),'r') as fao_raw:
    fao_mast, fao_tform = mask(fao_raw, kp.geometry, crop=True, indexes=1)
    fao_mast = fao_mast.astype(np.uint8)
    
    # load and update meta
    fao_prof = fao_raw.profile
    fao_prof['transform'] = fao_tform
    
    # calculate the pixel area in square kilometers
    pix_area = (fao_tform.a * fao_tform.e) / 1000000

In [12]:
# create separate binary arrays for each cropping type
orchard = np.where(fao_mast == 3, 1, 0).astype(np.uint8)
irrig = np.where(fao_mast == 4, 1, 0).astype(np.uint8)
non_irrig = np.where(fao_mast == 5, 1, 0).astype(np.uint8)

In [13]:
pix_area = (fao_tform.a * fao_tform.e) / 1000000

### Transform catchments

Load in the catchments, do some minor cleaning, join them to settlement layers, and aggregate population + agricultural areas within them. Then export to a geopackage

In [None]:
for shp in shp_list:
    
    # read in, slim down
    layer = gpd.read_file(os.path.join(data_dir,vect_in_dir,shp))
    layer = layer[['Id','gridcode','geometry']]
    
    # define name variables
    catchtime = re.findall(r'settle[0-9]+_(.*?)_',os.path.basename(shp))[0]
    size = re.findall(r'settle(.*?)_',os.path.basename(shp))[0]
    full = catchtime + '_' + size
    
    print(full)
    
    # settle
 
    settlement = gpd.read_file(os.path.join(data_dir,vect_in_dir,"KP_NGA_Settlements_200mBuff_Filtered.gpkg"),layer=f'{size}plus',driver="GPKG")
    settlement = pd.DataFrame(settlement)
    settlement.set_index('FID_Settle',drop=True)
    settlement.drop('geometry',axis=1,inplace=True)
 
    # dissolve / buffer

    layer = layer.dissolve(by="gridcode")
    layer = layer.buffer(100, 1, join_style=JOIN_STYLE.mitre).buffer(-100, 1, join_style=JOIN_STYLE.mitre)

    # clean up dissolved polygon
    layer = gpd.GeoDataFrame(layer)
    layer = layer.rename(columns={0:'geometry'}).set_geometry('geometry').reset_index()
    layer = layer.set_crs(dest_crs) # label the projection (layer is currently naive)
#     layer = layer.to_crs(4326) # change to 4326 for ZS operations with WGS84 rasters

    # A second step joins the attributes of the settlement layer to the catchments for that layer for later analysis.
    # This is necessary if your catchment prep process drops all attribute information, as in ArcGIS.

    layer = pd.merge(layer,settlement,how='left',left_on="gridcode",right_on="FID_Settle")
    
    # add HRSL population via Zonal Statistics
    pop_zs = zonal_stats(layer, hrsl, affine=hrsl_tform, nodata=np.nan,stats=['sum'])
    layer['hrsl_pop'] = pd.DataFrame(pop_zs)['sum']
 
    # summarize square kilometers of agricultural land of each type within catchment
    orch_zs = zonal_stats(layer, orchard, affine=fao_tform, nodata=np.nan,stats=['sum'])
    irrig_zs = zonal_stats(layer, irrig, affine=fao_tform, nodata=np.nan,stats=['sum'])
    non_irrig_zs = zonal_stats(layer, non_irrig, affine=fao_tform, nodata=np.nan,stats=['sum'])
    
    # add pixel sum column to layer and multiply it by pixel area factored in terms of square kilometers
    layer['orch_sqkm'] = np.abs(pd.DataFrame(orch_zs)['sum'] * pix_area)
    layer['irrig_sqkm'] = np.abs(pd.DataFrame(irrig_zs)['sum'] * pix_area)
    layer['non_irrig_sqkm'] = np.abs(pd.DataFrame(non_irrig_zs)['sum'] * pix_area)
    
    # export
    layer.to_file(os.path.join(data_dir,vect_out_dir,f'Settlement_catchments.gpkg'),layer=f'settle_{size}_{catchtime}_catchments',driver="GPKG")

30min_100


## Table operations

In [19]:
for shp in shp_list:
    
    # define name variables
    
    catchtime = re.findall(r'settle[0-9]+_(.*?)_',os.path.basename(shp))[0]
    size = re.findall(r'settle(.*?)_',os.path.basename(shp))[0]
    full = catchtime + '_' + size
    
    print(full)
    
    # load in file
    
    layer = gpd.read_file(os.path.join(data_dir,vect_out_dir,f'Settlement_catchments.gpkg'),layer=f'settle_{size}_{catchtime}_catchments',driver="GPKG")
    
    # Table operations
    layer['area_sqkm'] = (layer.geometry.area / 1000000) 
    
    layer['orch_sqkm'] = np.abs(layer['orch_sqkm'])
    layer['irrig_sqkm'] = np.abs(layer['irrig_sqkm'])
    layer['non_irrig_sqkm'] = np.abs(layer['non_irrig_sqkm'])

    layer['orch_pct'] = (layer['orch_sqkm'] / layer['area_sqkm']) * 100
    layer['irrig_pct'] = (layer['irrig_sqkm'] / layer['area_sqkm']) * 100
    layer['non_irrig_pct'] = (layer['non_irrig_sqkm'] / layer['area_sqkm']) * 100
    
    # Export
    layer.to_file(os.path.join(data_dir,vect_out_dir,f'Settlement_catchments.gpkg'),layer=f'settle_{size}_{catchtime}_catchments',driver="GPKG")
    layer.to_crs(4326).to_file(os.path.join(data_dir,vect_out_dir,f'Settlement_catchments.gpkg'),layer=f'settle_{size}_{catchtime}_catchments_4326',driver="GPKG")

30min_100
30min_200
