## Aligning rasters: A step-by-step breakdown

This notebook aligns input rasters with a base reference raster. The implict purpose, reflected in the datasets used here, is to align rasters so that raster math operations can be performed between the rasters

In [3]:
import os, sys
import re
import pprint
# from pprint import pprint

import numpy as np

import rasterio
from rasterio import features, transform
from rasterio.mask import mask
from rasterio.transform import Affine
from rasterio.warp import calculate_default_transform, reproject, Resampling

import pandas as pd
import geopandas as gpd

import shapely
from shapely.geometry import shape, box, Polygon

File paths

In [4]:
geo_dir = r'P:\PAK\GEO'
pop_dir = r'Population\HRSL'

data_dir = r'../../data'
acc_dir = r'access'
rast_dir = r'rast_inputs'


Projections

In [5]:
dest_crs = 'EPSG:32642'
dcrs_int = re.findall('[0-9]+',dest_crs)[0]

Useful functions

In [6]:
# Lightly adapted from https://gis.stackexchange.com/questions/290030/what-does-it-mean-to-reproject-a-satellite-image-from-utm-zone-13n-to-wgs84

def reproject_tif(source_file, destination_file,dest_crs):
    """Re-projects tif at source file to destination CRS at destination file.

    Args:
        source_file: file to re-project
        destination_file: file to store re-projection

    Returns:
        destination_file: where the re-projected file is saved at
    """

    with rasterio.open(source_file) as src:
        dst_crs = dest_crs
        transform, width, height = calculate_default_transform(
            src.crs,
            dst_crs,
            src.width,
            src.height,
            *src.bounds
        )

        kwargs = src.meta.copy()
        kwargs.update({
            'crs': dst_crs,
            'transform': transform,
            'width': width,
            'height': height,
            "compress":'LZW'
        })

        with rasterio.open(destination_file, 'w', **kwargs) as dst:
            for i in range(1, src.count + 1):
                reproject(
                    source=rasterio.band(src, i),
                    destination=rasterio.band(dst, i),
                    src_transform=src.transform,
                    src_crs=src.crs,
                    dst_transform=transform,
                    dst_crs=dst_crs,
                    resampling=Resampling.nearest,
                    num_threads=-1
                )

        return destination_file

### Load in rasters

Load in the base raster we are using as a template so we can match up exactly to its grid and cell size

In [9]:
base_rast_pth = os.path.join(data_dir,acc_dir,r'current/211021/Current_dry_District_HQs.tif')

In [21]:
with rasterio.open(base_rast_pth, 'r') as base_src:
    base_rast = base_src.read(1)
    base_idx = base_src.index
    base_profile = base_src.meta.copy()
    base_tform = base_src.transform   

In [11]:
# Generate a bounding box from the extent of the base raster

bds = rasterio.warp.array_bounds(base.shape[0],base.shape[1],base_tform)
base_bbox = box(bds[0],bds[1],bds[2],bds[3])

Load in raster to transform

In [12]:
trast_pth = os.path.join(geo_dir,pop_dir,r'kp_general_v15_32642.tif')

In [17]:
with rasterio.open(trast_pth, 'r') as trast_src:
    
    trast_in = trast_src.read(1)
    trast_profile = trast_src.profile

In [37]:
base_profile

{'driver': 'GTiff',
 'dtype': 'float32',
 'nodata': -99999.0,
 'width': 17193,
 'height': 21136,
 'count': 1,
 'crs': CRS.from_epsg(32642),
 'transform': Affine(28.23254382673943, 0.0, 502425.3974356071,
        0.0, -31.766168813716423, 4114847.74964671)}

In [19]:
trast_profile

{'driver': 'GTiff', 'dtype': 'float64', 'nodata': nan, 'width': 16041, 'height': 23286, 'count': 1, 'crs': CRS.from_epsg(32642), 'transform': Affine(27.765518519322814, 0.0, 522444.34588665137,
       0.0, -27.765044582909745, 4094862.6691058525), 'tiled': False, 'compress': 'zstd', 'interleave': 'band'}

### Reproject / transform and export

In [84]:
# create a blank array of the correct dimensions to populate while reprojecting
trast_transformed = np.zeros(base_rast.shape, np.float32)

# reproject + transform

with rasterio.Env():
    reproject(
        trast_in,
        trast_transformed,
        src_transform=trast_profile['transform'],
        src_crs=trast_profile['crs'],
        dst_transform=base_profile['transform'],
        dst_crs=base_profile['crs'],
        resampling=Resampling.med) # change to your preferred resampling methodology
    
# set to float32 to reduce size
trast_transformed = trast_transformed.astype(np.float32)

Check out resulting data

In [86]:
trast_transformed.shape

(21136, 17193)

In [87]:
np.nansum(trast_in)

36735331.69212351

In [90]:
# eliminate 0s -- this is specific to HRSL data
trast_transformed = np.where(trast_transformed == 0, np.nan, trast_transformed)

In [97]:
np.nansum(trast_transformed)

32406410.0

In [94]:
trast_transformed

array([[nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       ...,
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan]], dtype=float32)

Export

In [91]:
export_profile = base_profile.copy()
export_profile.update({
    "dtype":'float32',
    "height": trast_transformed.shape[0],
    "width": trast_transformed.shape[1],
    "transform": base_profile['transform'],
    "nodata" : np.nan,
    "compress":'ZSTD'})

In [95]:
# export aligned raster

with rasterio.open(os.path.join(data_dir,f'kp_general_v15_32642_aligned_med.tif'),'w',**export_profile) as dst:
    dst.write(trast_transformed,indexes=1)