Script to merge Age Data from the WorldPop Database

Preparation Steps for the Script: 

1. Download Age Data of the country of interest from https://hub.worldpop.org/geodata/listing?id=88 
--> These are about 36 files
2. Make two different folders for Children (0-10 years, 8 files) and Adults (>10 years, 28 files)
3. Download the Population density map of the country of interest from https://hub.worldpop.org/geodata/listing?id=76 

You should have two input folders (1. adults and 2. children) containing the different tif files of adult and children population and one single file with the overall population density. All files are in TIF format and will be converted to NETCDF ultimately.

Import Libraries

In [93]:
### Import Libraries ###

import os
import numpy as np
from osgeo import gdal
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib as mpl
import rasterio
import xarray as xr
import rasterio
from rasterio.enums import Resampling

Load input data for Adults and Children from folder and combine tiff files to one tiff file with 2 bands (Band 1 = Children, Band 2 = Adults)

In [94]:
# Input folders containing raster files for children and adults
input_folder_children = 'C:/Users/bleser/OneDrive - Stichting Deltares/Desktop/Floods&Health Quantification/Data bronnen/tif_raw_moz/Children_raw'
input_folder_adults = 'C:/Users/bleser/OneDrive - Stichting Deltares/Desktop/Floods&Health Quantification/Data bronnen/tif_raw_moz/Adults_raw'

# Output folder
output_folder = r'C:\Users\bleser\OneDrive - Stichting Deltares\Desktop\Floods&Health Quantification\Data bronnen\tif_raw_moz\Output'

# Output raster file
output_file = r'C:\Users\bleser\OneDrive - Stichting Deltares\Desktop\Floods&Health Quantification\Data bronnen\tif_raw_moz\Output\children_and_adults_combined.tif'

# Get a list of raster files for children and adults
raster_files_children = [f for f in os.listdir(input_folder_children) if f.endswith('.tif')]
raster_files_adults = [f for f in os.listdir(input_folder_adults) if f.endswith('.tif')]

if len(raster_files_children) == 0 or len(raster_files_adults) == 0:
    raise Exception("No raster files found in the input folders.")

# Open the first raster file to get information for the output file
first_raster_path = os.path.join(input_folder_children, raster_files_children[0])
first_raster = gdal.Open(first_raster_path)

if first_raster is None:
    raise Exception(f"Failed to open raster file: {first_raster_path}")

driver = gdal.GetDriverByName('GTiff')
rows, cols, bands = first_raster.RasterYSize, first_raster.RasterXSize, 2  # Two bands for children and adults: Band 1 = children, Band 2 = adults
geotransform = first_raster.GetGeoTransform()
projection = first_raster.GetProjection()

# Create arrays to accumulate the combined raster data for children and adults
combined_data_children = np.zeros((rows, cols), dtype=np.float32)
combined_data_adults = np.zeros((rows, cols), dtype=np.float32)

# Loop through each input raster file for children
for file_name in raster_files_children:
    file_path = os.path.join(input_folder_children, file_name)
    input_raster = gdal.Open(file_path)
    
    # Read the raster data
    raster_data = input_raster.ReadAsArray()
    
    # Set negative values to zero
    raster_data[raster_data < 0] = 0
    
    # Accumulate the raster data for children
    combined_data_children += raster_data.astype(np.float32)
    
    input_raster = None  # Close the input raster file

# Loop through each input raster file for adults
for file_name in raster_files_adults:
    file_path = os.path.join(input_folder_adults, file_name)
    input_raster = gdal.Open(file_path)
    
    # Read the raster data
    raster_data = input_raster.ReadAsArray()
    
    # Set negative values to zero
    raster_data[raster_data < 0] = 0
    
    # Accumulate the raster data for adults
    combined_data_adults += raster_data.astype(np.float32)
    
    input_raster = None  # Close the input raster file

# Create the output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Create the output raster file
output_path = os.path.join(output_folder, output_file)
output_raster = driver.Create(output_path, cols, rows, bands, gdal.GDT_Float32)
output_raster.SetGeoTransform(geotransform)
output_raster.SetProjection(projection)

# Write the combined data to the output file
output_raster.GetRasterBand(1).WriteArray(combined_data_children)
output_raster.GetRasterBand(2).WriteArray(combined_data_adults)

# Close the output raster file and save it
output_raster = None

print("Combined raster file saved to:", output_path)

Combined raster file saved to: C:\Users\bleser\OneDrive - Stichting Deltares\Desktop\Floods&Health Quantification\Data bronnen\tif_raw_moz\Output\children_and_adults_combined.tif


Convert Combined Children and adults to netCDF

In [99]:
# Path to the input GeoTIFF file with 2 bands
input_tiff = 'C:/Users/bleser/OneDrive - Stichting Deltares\Desktop\Floods&Health Quantification\Data bronnen/tif_raw_moz/Output/children_and_adults_combined.tif'

# Path to the output NetCDF file
output_netcdf = 'C:/Users/bleser/OneDrive - Stichting Deltares\Desktop\Floods&Health Quantification\Data bronnen/tif_raw_moz/Output/combined_ch_ad.nc'

# Open the GeoTIFF file
with rasterio.open(input_tiff) as src:
    # Read the data and metadata
    data = src.read()
    profile = src.profile
    crs = src.crs

    # Create xarray Dataset
    ds = xr.Dataset(
        {
            'band_1': (('y', 'x'), data[0]),
            'band_2': (('y', 'x'), data[1]),
        },
        coords={'x': src.bounds.left + src.res[0] * (0.5 + np.arange(src.width)),
                'y': src.bounds.top - src.res[1] * (0.5 + np.arange(src.height))},
        attrs={'crs': crs.to_string()},
    )

    # Save the Dataset to a NetCDF file
    ds.to_netcdf(output_netcdf)

print("Conversion completed successfully.")

Conversion completed successfully.


Convert pop_dens data to netcdf

In [100]:
# Path to the input GeoTIFF file with 3 bands
input_tiff = r'C:\Users\bleser\OneDrive - Stichting Deltares\Desktop\Floods&Health Quantification\Data bronnen\tif_raw_moz\moz_pd_2020.tif'

# Path to the output NetCDF file
output_netcdf = 'C:/Users/bleser/OneDrive - Stichting Deltares\Desktop\Floods&Health Quantification\Data bronnen/tif_raw_moz/Output/popdens.nc'

# Open the GeoTIFF file
with rasterio.open(input_tiff) as src:
    # Read the data and metadata
    data = src.read()
    profile = src.profile
    crs = src.crs

    # Create xarray Dataset
    ds = xr.Dataset(
        {
            'band_1': (('y', 'x'), data[0],)
        },
        coords={'x': src.bounds.left + src.res[0] * (0.5 + np.arange(src.width)),
                'y': src.bounds.top - src.res[1] * (0.5 + np.arange(src.height))},
        attrs={'crs': crs.to_string()},
    )

    # Save the Dataset to a NetCDF file
    ds.to_netcdf(output_netcdf)

print("Conversion completed successfully.")

Conversion completed successfully.


Merge Combined Adult and Children Data with Population density data (TIF)

Problems: Pop_dens data is rougher, larger grids (1km) --> tried to resample to the smaller raster (children and adults combined, 100m), but numbers per grid cell are still veeery high. I think there could be also an issue with , and .

In [101]:
# Paths to input GeoTIFF files
population_file = r'C:\Users\bleser\OneDrive - Stichting Deltares\Desktop\Floods&Health Quantification\Data bronnen\tif_raw_moz\moz_pd_2020.tif'  # Population data with one band
combined_data_file = r'C:\Users\bleser\OneDrive - Stichting Deltares\Desktop\Floods&Health Quantification\Data bronnen\tif_raw_moz\Output\children_and_adults_combined.tif'  # Combined children and adult data with two bands

# Read the combined children and adult data
with rasterio.open(combined_data_file) as src_combined:
    # Read the bands into a list of arrays
    combined_arrays = [src_combined.read(i+1) for i in range(src_combined.count)]

# Read the population data
with rasterio.open(population_file) as src_population:
    # Resample the population data to match the resolution of the combined data
    resampled_population_array = np.empty((src_combined.height, src_combined.width), dtype=src_population.dtypes[0])
    src_population.read(1, out=resampled_population_array, resampling=Resampling.bilinear)

# Reshape the population array to have one band and remove the singleton dimension
resampled_population_array = np.squeeze(resampled_population_array)

# Combine the arrays from the combined data and resampled population data
merged_arrays = combined_arrays + [resampled_population_array]

# Update metadata from the combined dataset (modify as needed)
out_meta = src_combined.meta.copy()
out_meta.update({
    "count": len(merged_arrays),  # Set the number of bands
})

# Output file path for the merged GeoTIFF
output_file ='C:/Users/bleser/OneDrive - Stichting Deltares\Desktop\Floods&Health Quantification\Data bronnen/tif_raw_moz/Output/combined_children_adults_popdens.tif'

# Write the merged dataset to a new GeoTIFF file
with rasterio.open(output_file, "w", **out_meta) as dest:
    for i, arr in enumerate(merged_arrays, start=1):
        dest.write(arr, i)

print("Merge completed successfully.")

Merge completed successfully.


Convert Merged Tif file to netCDF

In [102]:
# Path to the input GeoTIFF file with 3 bands
input_tiff = 'C:/Users/bleser/OneDrive - Stichting Deltares\Desktop\Floods&Health Quantification\Data bronnen/tif_raw_moz/Output/combined_children_adults_popdens.tif'

# Path to the output NetCDF file
output_netcdf = 'C:/Users/bleser/OneDrive - Stichting Deltares\Desktop\Floods&Health Quantification\Data bronnen/tif_raw_moz/Output/combined_ch_ad_popdens.nc'

# Open the GeoTIFF file
with rasterio.open(input_tiff) as src:
    # Read the data and metadata
    data = src.read()
    profile = src.profile
    crs = src.crs

    # Create xarray Dataset
    ds = xr.Dataset(
        {
            'band_1': (('y', 'x'), data[0]),
            'band_2': (('y', 'x'), data[1]),
            'band_3': (('y', 'x'), data[2]),
        },
        coords={'x': src.bounds.left + src.res[0] * (0.5 + np.arange(src.width)),
                'y': src.bounds.top - src.res[1] * (0.5 + np.arange(src.height))},
        attrs={'crs': crs.to_string()},
    )

    # Save the Dataset to a NetCDF file
    ds.to_netcdf(output_netcdf)

print("Conversion completed successfully.")

Conversion completed successfully.


Merge previous two NetCDF files into one

In [103]:
# Path to the input NetCDF files
input_netcdf_1 = 'C:/Users/bleser/OneDrive - Stichting Deltares\Desktop\Floods&Health Quantification\Data bronnen/tif_raw_moz/Output/combined_ch_ad.nc'  # NetCDF file with 2 bands
input_netcdf_2 = 'C:/Users/bleser/OneDrive - Stichting Deltares\Desktop\Floods&Health Quantification\Data bronnen/tif_raw_moz/Output/popdens.nc'  # NetCDF file with 1 band

# Path to the output NetCDF file
output_netcdf = 'C:/Users/bleser/OneDrive - Stichting Deltares\Desktop\Floods&Health Quantification\Data bronnen/tif_raw_moz/Output/merged_ch_ad_popdens2.nc'

# Open the input NetCDF files
ds1 = xr.open_dataset(input_netcdf_1)
ds2 = xr.open_dataset(input_netcdf_2)

# Merge the datasets
merged_ds = xr.merge([ds1, ds2])

# Save the merged dataset to a NetCDF file
merged_ds.to_netcdf(output_netcdf)

print("Merge completed successfully.")

Merge completed successfully.
