## Aligning rasters: A step-by-step breakdown

This notebook aligns input rasters with a base reference raster. The implict purpose, reflected in the datasets used here, is to align rasters so that raster math operations can be performed between the rasters

In [7]:
import os, sys
import re
import pprint
# from pprint import pprint

import numpy as np

import rasterio
from rasterio import features, transform
from rasterio.mask import mask
from rasterio.transform import Affine
from rasterio.warp import calculate_default_transform, reproject, Resampling

import pandas as pd
import geopandas as gpd

import shapely
from shapely.geometry import shape, box, Polygon

### Setup

Directories

In [8]:
geo_dir = r'P:\PAK\GEO'
data_dir = r'../../data'

rast_dir = r'rast_inputs'
vect_in_dir = r'vect_inputs'
vect_out_dir = r'vect_out'

rds_dir = r'roads'
dest_dir = r'destinations'
speed_dir = r'speed'
fric_dir = r'friction'
acc_dir = r'access'

Projections

In [9]:
dest_crs = 'EPSG:32642'
dcrs_int = int(re.findall('[0-9]+',dest_crs)[0])

### Load and process raster to points

Load in the base raster we are using as a template so we can match up exactly to its grid and cell size

In [10]:
rast_pth = os.path.join(geo_dir,r'Population/HRSL/kp_general_v15.tif')

In [11]:
import rasterio
from rasterio import features

In [12]:
with rasterio.open(rast_pth, 'r') as src1:
    
    rast = src1.read(1).astype(np.float32)
    
    # populate geoms list

    results = (
        {'properties': {'POP': v}, 'geometry': s}
        for i, (s, v) 
        in enumerate(
            rasterio.features.shapes(rast, transform=src1.transform)))

    geoms = list(results)

    # convert to GDF, clean up, and dissolve

    poly = gpd.GeoDataFrame.from_features(geoms)
    pts = poly.copy()
    pts.geometry = pts.geometry.centroid
    

KeyboardInterrupt: 

In [17]:
pts.dtypes

Unnamed: 0      int64
POP           float64
lon_4326      float64
lat_4326      float64
lon_32642     float64
lat_32642     float64
dtype: object

#### Set up dask cluster (if this is a lot points)

In [19]:

import dask
import coiled
from dask.distributed import Client, LocalCluster, Lock
from dask.utils import SerializableLock
import dask.dataframe as dd

from dask_control import *

In [20]:
client=get_dask_client(cluster_type='local',n_workers=2,processes=True,threads_per_worker=4)
client

0,1
Connection method: Direct,
Dashboard: http://127.0.0.1:8787/status,

0,1
Comm: tcp://127.0.0.1:8786,Workers: 2
Dashboard: http://127.0.0.1:8787/status,Total threads: 8
Started: 28 minutes ago,Total memory: 31.98 GiB

0,1
Comm: tcp://10.175.66.81:59264,Total threads: 4
Dashboard: http://10.175.66.81:59265/status,Memory: 15.99 GiB
Nanny: tcp://127.0.0.1:62400,
Local directory: P:\PAK\Code\kpgit\kpgit\notebooks\MP\dask-worker-space\worker-pk7ro2nm,Local directory: P:\PAK\Code\kpgit\kpgit\notebooks\MP\dask-worker-space\worker-pk7ro2nm
Tasks executing: 0,Tasks in memory: 0
Tasks ready: 0,Tasks in flight: 0
CPU usage: 0.0%,Last seen: Just now
Memory usage: 42.32 MiB,Spilled bytes: 0 B
Read bytes: 0.0 B,Write bytes: 2.66 MiB

0,1
Comm: tcp://10.175.66.81:59261,Total threads: 4
Dashboard: http://10.175.66.81:59262/status,Memory: 15.99 GiB
Nanny: tcp://127.0.0.1:62399,
Local directory: P:\PAK\Code\kpgit\kpgit\notebooks\MP\dask-worker-space\worker-ifncip11,Local directory: P:\PAK\Code\kpgit\kpgit\notebooks\MP\dask-worker-space\worker-ifncip11
Tasks executing: 0,Tasks in memory: 0
Tasks ready: 0,Tasks in flight: 0
CPU usage: 0.0%,Last seen: Just now
Memory usage: 42.24 MiB,Spilled bytes: 0 B
Read bytes: 19.78 MiB,Write bytes: 1.79 MiB


#### Load in points data

In [42]:
# Load in points if needing to redo for some reason
pts = dd.read_csv(os.path.join(geo_dir,'Population/HRSL/pak_general_v15_pts.csv'),
                     na_values = ' ',
                     blocksize='100mb'
                )

In [43]:
# pts = gpd.GeoDataFrame(pts, geometry = gpd.points_from_xy(x=pts.lon_4326,y=pts.lat_4326)).set_crs("EPSG:4326")

#### Clip to desired extent

Load in KP as clipping object

In [46]:
kp = gpd.read_file(os.path.join(geo_dir,'Boundaries/OCHA/pak_admbnda_adm1_ocha_pco_gaul_20181218.shp'))
kp = kp[kp['ADM1_EN'] == 'Khyber Pakhtunkhwa']
kp = kp.to_crs(dest_crs)

# Buffer the polygon by 20km so we take in nearby markets and roads that may be used
# kp.geometry = kp.buffer(20000)

In [47]:
kp = kp.to_crs(4326)

In [48]:
# pts = pts.to_crs(4326)
# pts['lon_4326'] = pts.geometry.x
# pts['lat_4326'] = pts.geometry.y

In [49]:
# pts = pts.to_crs(32642)
# pts['lon_32642'] = pts.geometry.x
# pts['lat_32642'] = pts.geometry.y

In [50]:
# kp_pts = gpd.clip(pts,kp)

In [51]:
def clip_pts(df, polys):
    # Join using 4326
    # Convert to GDF
    if isinstance(polys, gpd.GeoDataFrame) == False:
        polys = polys.result()
    gdf = gpd.GeoDataFrame(
        df, 
        geometry=gpd.points_from_xy(df.lon_4326, df.lat_4326)
    ).set_crs("EPSG:4326")
    
    # Clip by extent
    gdf = gpd.clip(gdf, polys)
    
    df = pd.DataFrame(gdf.drop('geometry', axis=1))
    
    
    return df
    

In [52]:
# Broadcast adm3
kp_dist = client.scatter(kp, broadcast=True)

In [55]:
# Distributed clip
kp_pts = pts.map_partitions(clip_pts, kp_dist)

In [58]:
len(kp_pts)

1347614

In [63]:
kp_pts

Unnamed: 0_level_0,Unnamed: 0,POP,lon_4326,lat_4326,lon_32642,lat_32642
npartitions=9,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
,int64,float64,float64,float64,float64,float64
,...,...,...,...,...,...
...,...,...,...,...,...,...
,...,...,...,...,...,...
,...,...,...,...,...,...


In [64]:
kp_pts.dtypes

Unnamed: 0      int64
POP           float64
lon_4326      float64
lat_4326      float64
lon_32642     float64
lat_32642     float64
dtype: object

In [62]:
kp_pts.head()

Unnamed: 0.1,Unnamed: 0,POP,lon_4326,lat_4326,lon_32642,lat_32642


#### Export

In [59]:
# pts.drop('geometry',axis=1).to_csv(os.path.join(geo_dir,'Population/HRSL/pak_general_v15_pts.csv'))
kp_pts.drop('geometry',axis=1).to_csv(os.path.join(geo_dir,'Population/HRSL/kp_general_v15_pts.csv'))

NotADirectoryError: [WinError 267] The directory name is invalid: 'P:/PAK/GEO/Population/HRSL/kp_general_v15_pts.csv'

In [60]:
pts.to_crs(4326).to_file(os.path.join(geo_dir,'Population/HRSL/pak_general_v15_pts.gpkg'),layer="pak_general_v15_4326",driver='GPKG')

In [59]:
pts.to_crs(dcrs_int).to_file(os.path.join(geo_dir,'Population/HRSL/pak_general_v15_pts.gpkg'),layer=f"pak_general_v15_{dcrs_int}",driver='GPKG')

In [62]:
kp_pts.to_crs(4326).to_file(os.path.join(geo_dir,'Population/HRSL/kp_hrsl_v15_pts.gpkg'),layer="kp_general_v15_4326",driver='GPKG')

In [61]:
kp_pts.to_crs(dcrs_int).to_file(os.path.join(geo_dir,'Population/HRSL/kp_hrsl_v15_pts.gpkg'),layer=f"kp_general_v15_{dcrs_int}",driver='GPKG')