Script to merge Age Data from the WorldPop Database

This script is meant to prepare the demographic data (Age, Population density) from the WorldPop Databank (https://hub.worldpop.org) for the further analysis with the Floods and Health Tool. 

This script merges the  input files (tif) into one file (netcdf) containing  bands: band 1 Children counts, band 2 Adult counts, band 3 Population density. The population density data is resampled to the smaller pixel size of the age data and the interpolation method used is Neares Neighbour. 




Preparation Steps for the Script: 

1. Download Age Data of the country of interest from https://hub.worldpop.org/geodata/listing?id=88 
--> These are about 36 files
2. Make two different folders for Children (0-10 years, 8 files) and Adults (>10 years, 28 files)
3. Download the Population density map of the country of interest from https://hub.worldpop.org/geodata/listing?id=76 

You should have two input folders (1. adults and 2. children) containing the different tif files of adult and children population and one single file with the overall population density. All files are in TIF format and will be converted to NETCDF ultimately.

Import Libraries

In [37]:
### Import Libraries ###

import os
import numpy as np
from osgeo import gdal
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib as mpl
import rasterio
import xarray as xr
import rasterio
from rasterio.enums import Resampling
import gdal


Load input data for Adults and Children from folder and combine tiff files to one tiff file with 2 bands (Band 1 = Children, Band 2 = Adults)

In [23]:

# Directory with input data
mapsDir = '../examples/'
# output directory
ds_out = '/output/preprocess/' #make new output folder for pre-processing?

# Input folders containing raster files for children and adults
input_folder_children = mapsDir+'Children_raw' #'C:/Users/bleser/OneDrive - Stichting Deltares/Desktop/Floods&Health Quantification/Data bronnen/tif_raw_moz/Children_raw'
input_folder_adults = mapsDir+'Adults_raw'#'C:/Users/bleser/OneDrive - Stichting Deltares/Desktop/Floods&Health Quantification/Data bronnen/tif_raw_moz/Adults_raw'

# Output folder
#output_folder = r'C:\Users\bleser\OneDrive - Stichting Deltares\Desktop\Floods&Health Quantification\Data bronnen\tif_raw_moz\Output'

# Output raster file
Ch_ad_combined_tif = ds_out+'children_adults_combined.tif' r'C:\Users\bleser\OneDrive - Stichting Deltares\Desktop\Floods&Health Quantification\Data bronnen\tif_raw_moz\Output\children_and_adults_combined.tif'

# Get a list of raster files for children and adults
raster_files_children = [f for f in os.listdir(input_folder_children) if f.endswith('.tif')]
raster_files_adults = [f for f in os.listdir(input_folder_adults) if f.endswith('.tif')]

if len(raster_files_children) == 0 or len(raster_files_adults) == 0:
    raise Exception("No raster files found in the input folders.")

# Open the first raster file to get information for the output file
first_raster_path = os.path.join(input_folder_children, raster_files_children[0])
first_raster = gdal.Open(first_raster_path)

if first_raster is None:
    raise Exception(f"Failed to open raster file: {first_raster_path}")

driver = gdal.GetDriverByName('GTiff')
rows, cols, bands = first_raster.RasterYSize, first_raster.RasterXSize, 2  # Two bands for children and adults: Band 1 = children, Band 2 = adults
geotransform = first_raster.GetGeoTransform()
projection = first_raster.GetProjection()

# Create arrays to accumulate the combined raster data for children and adults
combined_data_children = np.zeros((rows, cols), dtype=np.float32)
combined_data_adults = np.zeros((rows, cols), dtype=np.float32)

# Loop through each input raster file for children
for file_name in raster_files_children:
    file_path = os.path.join(input_folder_children, file_name)
    input_raster = gdal.Open(file_path)
    
    # Read the raster data
    raster_data = input_raster.ReadAsArray()
    
    # Set negative values to zero
    raster_data[raster_data < 0] = 0
    
    # Accumulate the raster data for children
    combined_data_children += raster_data.astype(np.float32)
    
    input_raster = None  # Close the input raster file

# Loop through each input raster file for adults
for file_name in raster_files_adults:
    file_path = os.path.join(input_folder_adults, file_name)
    input_raster = gdal.Open(file_path)
    
    # Read the raster data
    raster_data = input_raster.ReadAsArray()
    
    # Set negative values to zero
    raster_data[raster_data < 0] = 0
    
    # Accumulate the raster data for adults
    combined_data_adults += raster_data.astype(np.float32)
    
    input_raster = None  # Close the input raster file

# Create the output folder if it doesn't exist
os.makedirs(ds_out, exist_ok=True)

# Create the output raster file
output_path = os.path.join(ds_out, Ch_ad_combined_tif)
output_raster = driver.Create(output_path, cols, rows, bands, gdal.GDT_Float32)
output_raster.SetGeoTransform(geotransform)
output_raster.SetProjection(projection)

# Write the combined data to the output file
output_raster.GetRasterBand(1).WriteArray(combined_data_children)
output_raster.GetRasterBand(2).WriteArray(combined_data_adults)

# Close the output raster file and save it
output_raster = None

print("Combined raster file saved to:", output_path)

Combined raster file saved to: C:\Users\bleser\OneDrive - Stichting Deltares\Desktop\Floods&Health Quantification\Data bronnen\tif_raw_moz\Output\children_and_adults_combined.tif


Convert Combined Children and adults to netCDF

In [24]:
# Path to the output NetCDF file
Ch_ad_combined_nc = ds_out+'combined_ch_ad.nc'

# Open the GeoTIFF file
with rasterio.open(Ch_ad_combined_tif) as src:
    # Read the data and metadata
    data = src.read()
    profile = src.profile
    crs = src.crs

    # Create xarray Dataset
    ds = xr.Dataset(
        {
            'band_1': (('y', 'x'), data[0]),
            'band_2': (('y', 'x'), data[1]),
        },
        coords={'x': src.bounds.left + src.res[0] * (0.5 + np.arange(src.width)),
                'y': src.bounds.top - src.res[1] * (0.5 + np.arange(src.height))},
        attrs={'crs': crs.to_string()},
    )

    # Save the Dataset to a NetCDF file
    ds.to_netcdf(Ch_ad_combined_nc)

print("Conversion completed successfully.")

Conversion completed successfully.


Pre-process PopDens Data

Resampling with gdal, making it the same pixel size as the age data. Resampling method: Nearest Neigbour

In [60]:
# Input and output file paths
input_popdens = mapsDir+ 'moz_pd_2020.tif'
output_popdens_tif = ds_out+'popd_raster_fine.tif'

# Open the input dataset
src_ds = gdal.Open(input_popdens)

# Get the input geotransform and projection
geotransform = src_ds.GetGeoTransform()
projection = src_ds.GetProjection()

# Specify the metadata values
new_min_x = 30.2179165609999991
new_max_x = 40.8395831851799969
new_min_y = -26.8687496907399996
new_max_y = -10.4704164229999996
new_width = 12746
new_height = 19678
new_pixel_width = 0.0008333333299999999945
new_pixel_height = -0.0008333333299999999945

# Create the output dataset
drv = gdal.GetDriverByName('GTiff')
dst_ds = drv.Create(output_popdens_tif, new_width, new_height, src_ds.RasterCount, gdal.GDT_Float32)

# Calculate the geotransform for the output dataset
new_geotransform = (new_min_x, new_pixel_width, 0, new_max_y, 0, new_pixel_height)

# Set the geotransform and projection for the output dataset
dst_ds.SetGeoTransform(new_geotransform)
dst_ds.SetProjection(projection)

# Perform the resampling with nearest neighbor
resample_method = gdal.GRA_NearestNeighbour
gdal.ReprojectImage(src_ds, dst_ds, None, None, resample_method)

# Close the datasets
src_ds = None
dst_ds = None

Convert pop_dens data to netcdf

In [100]:

# Path to the output NetCDF file
output_netcdf_popdens = ds_out+'popdens.nc'

# Open the GeoTIFF file
with rasterio.open(output_popdens_tif) as src:
    # Read the data and metadata
    data = src.read()
    profile = src.profile
    crs = src.crs

    # Create xarray Dataset
    ds = xr.Dataset(
        {
            'band_1': (('y', 'x'), data[0],)
        },
        coords={'x': src.bounds.left + src.res[0] * (0.5 + np.arange(src.width)),
                'y': src.bounds.top - src.res[1] * (0.5 + np.arange(src.height))},
        attrs={'crs': crs.to_string()},
    )

    # Save the Dataset to a NetCDF file
    ds.to_netcdf(output_netcdf_popdens)

print("Conversion completed successfully.")

Conversion completed successfully.


Merge previous two NetCDF files into one (Children + Adults + PopDens)

In [103]:
# Path to the output NetCDF file
output_netcdf = ds_out+'merged_ch_ad_popdens.nc'

# Open the input NetCDF files
ds1 = xr.open_dataset(Ch_ad_combined_nc)
ds2 = xr.open_dataset(output_netcdf_popdens)

# Merge the datasets
merged_ds = xr.merge([ds1, ds2])

# Save the merged dataset to a NetCDF file
merged_ds.to_netcdf(output_netcdf)

print("Merge completed successfully.")

Merge completed successfully.
