In [1]:
import os
import sys
import rasterio
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import shutil
from pathlib import Path
import geopandas as gpd
from rasterio.warp import reproject, Resampling

import rasterio.mask
from rasterio.mask import mask

In [2]:
# Get the current working directory
current_dir = os.path.abspath('')

# Search for the 'constants.py' file starting from the current directory and moving up the hierarchy
project_root = current_dir
while not os.path.isfile(os.path.join(project_root, 'constants.py')):
    project_root = os.path.dirname(project_root)

# Add the project root to the Python path
sys.path.append(project_root)


In [3]:
from constants import MODIFIED_RASTER, GRUPO_RASTER, DATA_PATH, CROPPED_HANSEN_TEN_MASK, HANSEN_TREECOVER_FILEPATH, SOIL, DISSOLVED_CLEAN_YEAR, TREECOVER_10_AND_ABOVE, SOIL


In [4]:
# Where files will save, can add subfolders if desired
output_dir = os.path.join(DATA_PATH, 'ml_data')
os.makedirs(output_dir, exist_ok=True)

In [14]:
cropped_resampled_soil = r'C:\Users\bsf31\Documents\post-meds\data\policy-data\processed_rasters\cropped_resampled_soil.tif'

# Crop Hansen Treecover 10 and above to within property limits

In [6]:
dissolved_clean_years =  gpd.read_file(DISSOLVED_CLEAN_YEAR)
dissolved_clean_years = dissolved_clean_years.to_crs('EPSG:4326')
# Where files will save, can add subfolders if desired
output_dir = os.path.join(DATA_PATH, 'processed_rasters')
os.makedirs(output_dir, exist_ok=True)
# Ensure CRS match between shapefile and raster
shapes = dissolved_clean_years.to_crs(crs=rasterio.open(TREECOVER_10_AND_ABOVE).crs)


In [33]:
# Load the raster
with rasterio.open(TREECOVER_10_AND_ABOVE) as src:
    # Crop the raster with the shapefile
    out_image, out_transform = mask(src, shapes.geometry, crop=False)
    out_meta = src.meta.copy()

    # Update the metadata to reflect the new shape (height, width), transform, and nodata value
    out_meta.update({"driver": "GTiff",
                     "height": out_image.shape[1],
                     "width": out_image.shape[2],
                     "transform": out_transform,
                     "nodata": -1})

    # Save the cropped raster
    output_raster_path = os.path.join(output_dir, "cropped_TREECOVER_10_AND_ABOVE.tif")
    with rasterio.open(output_raster_path, "w", **out_meta) as dest:
        dest.write(out_image)

In [9]:
raster_to_use_as_mask =  os.path.join(DATA_PATH,'processed_rasters',  "cropped_TREECOVER_10_AND_ABOVE.tif")

In [8]:
def crop_raster_with_shapefile(input_raster_path, shapefile, output_dir, output_filename="cropped_raster.tif"):
    """
    Crops an input raster using the boundaries defined in a shapefile and saves the cropped raster.

    Parameters:
    - input_raster_path: Path to the input raster file.
    - shapefile: A GeoDataFrame or similar object containing the geometry for cropping.
    - output_dir: Directory where the cropped raster will be saved.
    - output_filename: Name for the output cropped raster file. Default is 'cropped_raster.tif'.
    """
    # Ensure the output directory exists
    os.makedirs(output_dir, exist_ok=True)

    # Open the input raster
    with rasterio.open(input_raster_path) as src:
        # Crop the raster with the shapefile
        out_image, out_transform = mask(src, shapefile.geometry, crop=True)
        out_meta = src.meta.copy()

        # Update the metadata to reflect the new shape, transform, and nodata value
        out_meta.update({
            "driver": "GTiff",
            "height": out_image.shape[1],
            "width": out_image.shape[2],
            "transform": out_transform,
            "nodata": 0
        })

        # Define the output raster path
        output_raster_path = os.path.join(output_dir, output_filename)

        # Save the cropped raster
        with rasterio.open(output_raster_path, "w", **out_meta) as dest:
            dest.write(out_image)

In [5]:

def crop_and_mask_raster(src_raster_path, property_raster_path, output_raster_path, max_height, max_width):
    with rasterio.open(src_raster_path) as src:
        # Read the source raster
        src_data = src.read(1)
        
        with rasterio.open(property_raster_path) as property_raster:
            # Read the property limits raster
            property_data = property_raster.read(1)

            # Pad both the source data and property data to the maximum dimensions
            src_data = np.pad(src_data, ((0, max_height - src_data.shape[0]), (0, max_width - src_data.shape[1])), constant_values=0)
            property_data = np.pad(property_data, ((0, max_height - property_data.shape[0]), (0, max_width - property_data.shape[1])), constant_values=0)

            # Create a mask using the padded property limits raster, where zero values are considered as nodata
            mask_data = (property_data != -1)

            # Check if the nodata value is set for the source raster, otherwise use a default value
            nodata_value = src.nodata if src.nodata is not None else -1

            # Apply the mask to the source raster
            src_data_masked = np.where(mask_data, src_data, nodata_value)
            print(f"Source data shape:{src_data.shape} Property data shape:{property_data.shape} Mask data shape:, {mask_data.shape} After masking: Source data masked shape:, {src_data_masked.shape}")

             # Save the masked raster
            profile = src.profile
            profile['height'] = max_height
            profile['width'] = max_width
            profile['nodata'] = nodata_value
            profile['dtype'] = 'int16'
            with rasterio.open(output_raster_path, 'w', **profile) as dst:
                dst.write(src_data_masked.astype(profile['dtype']), 1)




In [10]:

def apply_mask_to_raster(mask_raster_path, target_raster_path, output_raster_path):
    """
    Applies a mask from one raster to another, setting the target raster values to nodata wherever the mask is zero.

    Parameters:
    - mask_raster_path: Path to the raster to use as a mask.
    - target_raster_path: Path to the raster to apply the mask to.
    - output_raster_path: Path where the masked raster will be saved.
    """
    # Open the mask raster
    with rasterio.open(mask_raster_path) as mask_raster:
        mask_data = mask_raster.read(1)  # Assuming the mask is single-band

    # Open the target raster
    with rasterio.open(target_raster_path) as target_raster:
        target_data = target_raster.read()
        target_meta = target_raster.meta.copy()

        # Assume nodata values are properly defined in the target raster
        nodata_value = target_raster.nodata
        if nodata_value is None:
            nodata_value = -1  # Default nodata value if not set

        # Apply the mask: Set target raster values to nodata wherever the mask is zero
        for band in range(target_data.shape[0]):  # Loop through each band
            target_data[band][mask_data == 0] = nodata_value

        # Update the metadata to reflect any changes
        target_meta.update(nodata=nodata_value)

        # Save the masked raster
        with rasterio.open(output_raster_path, 'w', **target_meta) as dest_raster:
            dest_raster.write(target_data)



# Soil

In [31]:

output_dir = os.path.join(DATA_PATH, 'ml_data')
soil_masked_output_raster = os.path.join(output_dir,'soil_masked.tif')

In [66]:
# Open the reference raster
with rasterio.open(raster_to_use_as_mask) as ref_raster:
    ref_transform = ref_raster.transform
    ref_crs = ref_raster.crs
    ref_width = 20381
    ref_height = 22512

    # Open the raster to be resampled
    with rasterio.open(SOIL) as soil_raster:
        soil_data = soil_raster.read(1)

        # Create a new array for the resampled data
        resampled_data = np.empty(shape=(ref_height, ref_width))

        # Resample the soil data
        reproject(
            source=soil_data,
            destination=resampled_data,
            src_transform=soil_raster.transform,
            src_crs=soil_raster.crs,
            dst_transform=ref_transform,
            dst_crs=ref_crs,
            resampling=Resampling.nearest)

        # Write the resampled data to a new raster file
        with rasterio.open('resampled_soil.tif', 'w', driver='GTiff',
                           height=ref_height, width=ref_width,
                           count=1, dtype=str(soil_data.dtype),
                           nodata= 0,
                           crs=ref_crs, transform=ref_transform) as dst:
            dst.write(resampled_data, 1)



In [20]:
resampled_soil = r"C:\Users\bsf31\Documents\post-meds\data\policy-data\processed_rasters\resampled_soil.tif"

In [34]:
crop_and_mask_raster(resampled_soil, raster_to_use_as_mask,soil_masked_output_raster,22512, 20381)

Source data shape:(22512, 20381) Property data shape:(22512, 20381) Mask data shape:, (22512, 20381) After masking: Source data masked shape:, (22512, 20381)


In [28]:
output_raster_path

'C:\\Users\\bsf31\\Documents\\post-meds\\data\\policy-data\\ml_data\\cities_masked.tif'

In [15]:
output_dir = os.path.join(DATA_PATH, 'processed_rasters')
os.makedirs(output_dir, exist_ok=True)

In [91]:
crop_raster_with_shapefile('resampled_soil.tif',dissolved_clean_years,output_dir, 'cropped_resampled_soil.tif' )

In [94]:
# Open the original raster to read data
with rasterio.open(cropped_resampled_soil) as raster:
    data = raster.read(1)  # Read the first band
    meta = raster.meta.copy()  # Copy the metadata

    # Modify the data: Set areas with 0 to -1 (your chosen nodata value)
    data[data == 0] = -1

    # Update the metadata for the new file
    meta.update({
        'nodata': -1,  # Set the new nodata value
        'dtype': 'int16'  # Ensure the data type is int16
    })

# Create a new raster file with the modified data and updated metadata
new_raster_path =  os.path.join(DATA_PATH, 'processed_rasters', 'cropped_resampled_soil_nodata.tif')
with rasterio.open(new_raster_path, 'w', **meta) as new_raster:
    new_raster.write(data.astype('int16'), 1)  # Write the modified data as the first band


In [99]:
with rasterio.open(new_raster_path) as src:
    src_data = src.read(1)
    src_transform = src.transform
    src_crs = src.crs
    src_profile = src.profile

with rasterio.open(raster_to_use_as_mask) as mask_raster:
    mask_data = mask_raster.read(1)
    mask_transform = mask_raster.transform
    mask_crs = mask_raster.crs

    # Ensure the mask is in the same CRS, transform, and extent as the source
    if (src_crs != mask_crs) or (src_transform != mask_transform) or (src_data.shape != mask_data.shape):
        aligned_mask_data = np.empty(src_data.shape, dtype=mask_raster.dtypes[0])
        reproject(
            source=mask_data,
            destination=aligned_mask_data,
            src_transform=mask_transform,
            src_crs=mask_crs,
            dst_transform=src_transform,
            dst_crs=src_crs,
            resampling=Resampling.nearest
        )
    else:
        aligned_mask_data = mask_data

# Apply the mask, ensuring nodata values are aligned
nodata_mask = (aligned_mask_data == -1)  # Assuming -1 is the nodata value in the mask
masked_src_data = np.where(nodata_mask, -1, src_data)  # Apply nodata values from the mask to the source

# Write the masked data to a new raster with updated profile
src_profile.update(nodata=-1)
with rasterio.open(soil_masked_output_raster, 'w', **src_profile) as dst:
    dst.write(masked_src_data, 1)

# Precipitation

In [109]:
precipitation_raw = r"C:\Users\bsf31\Documents\post-meds\data\policy-data\precipitation_raw\average_precipitation_2001_2022.tif"

In [122]:
output_dir = os.path.join(DATA_PATH, 'ml_data')
os.makedirs(output_dir, exist_ok=True)
output_raster_path = os.path.join(output_dir, "precipitation_masked.tif")

In [123]:
crop_and_mask_raster(precipitation_raw, raster_to_use_as_mask,output_raster_path,22512, 20381)

Source data shape:(22512, 20381) Property data shape:(22512, 20381) Mask data shape:, (22512, 20381) After masking: Source data masked shape:, (22512, 20381)


# Land Use Plan 'GRUPO'

In [6]:
grupo = r"C:\Users\bsf31\Documents\post-meds\data\policy-data\processed_rasters\grupo\output_grupo_raster.tif"

In [7]:
output_dir = os.path.join(DATA_PATH, 'ml_data')
os.makedirs(output_dir, exist_ok=True)
output_raster_path = os.path.join(output_dir, "grupo_masked.tif")

In [10]:
crop_and_mask_raster(grupo, raster_to_use_as_mask,output_raster_path,22512, 20381)

Source data shape:(22512, 20381) Property data shape:(22512, 20381) Mask data shape:, (22512, 20381) After masking: Source data masked shape:, (22512, 20381)


# Travel Time to Cities and Ports

In [1]:
cities = r"C:\Users\bsf31\Documents\post-meds\data\policy-data\raw_nelson-20240308T085020Z-001\raw_nelson\travel_time_to_cities_12.tif"

In [11]:
output_dir = os.path.join(DATA_PATH, 'ml_data')
os.makedirs(output_dir, exist_ok=True)
output_raster_path = os.path.join(output_dir, "cities_masked.tif")

In [17]:
# Open the reference raster
with rasterio.open(raster_to_use_as_mask) as ref_raster:
    ref_transform = ref_raster.transform
    ref_crs = ref_raster.crs
    ref_width = 20381
    ref_height = 22512

    # Open the raster to be resampled
    with rasterio.open(cities) as cities_src:
        cities_data = cities_src.read(1)
        cities_profile = cities_src.profile

        # Update the profile of the resampled raster
        cities_profile.update({
            'crs': ref_crs,
            'transform': ref_transform,  # Use the transform of the reference raster
            'width': ref_width,  # Use the width of the reference raster
            'height': ref_height,  # Use the height of the reference raster
            'nodata': -1,
            'dtype': 'int16'
        })

        # Create a new array for the resampled data
        resampled_data = np.empty((ref_height, ref_width))

        # Resample the cities data
        reproject(
            source=cities_data,
            destination=resampled_data,
            src_transform=cities_src.transform,
            src_crs=cities_src.crs,
            dst_transform=ref_transform,  # Use the transform of the reference raster
            dst_crs=ref_crs,
            resampling=Resampling.nearest)

        # Write the resampled data to a new raster file
        with rasterio.open('resampled_cities.tif', 'w', **cities_profile) as dst:
            dst.write(resampled_data.astype('int16'), 1)

cities_masked_output_raster = os.path.join(output_dir,'cities_masked.tif')



Source data shape:(22512, 20381) Property data shape:(22512, 20381) Mask data shape:, (22512, 20381) After masking: Source data masked shape:, (22512, 20381)


In [30]:
# Pass the resampled cities data to your crop_and_mask_raster function
crop_and_mask_raster('resampled_cities.tif', raster_to_use_as_mask, cities_masked_output_raster, 22512, 20381)

Source data shape:(22512, 20381) Property data shape:(22512, 20381) Mask data shape:, (22512, 20381) After masking: Source data masked shape:, (22512, 20381)


In [18]:
ports = r"C:\Users\bsf31\Documents\post-meds\data\policy-data\raw_nelson-20240308T085020Z-001\raw_nelson\travel_time_to_ports_5.tif"

In [19]:
# Open the reference raster
with rasterio.open(raster_to_use_as_mask) as ref_raster:
    ref_transform = ref_raster.transform
    ref_crs = ref_raster.crs
    ref_width = 20381
    ref_height = 22512

    # Open the raster to be resampled
    with rasterio.open(ports) as ports_src:
        ports_data = ports_src.read(1)
        ports_profile = ports_src.profile

        # Update the profile of the resampled raster
        ports_profile.update({
            'crs': ref_crs,
            'transform': ref_transform,  # Use the transform of the reference raster
            'width': ref_width,  # Use the width of the reference raster
            'height': ref_height,  # Use the height of the reference raster
            'nodata': -1,
            'dtype': 'int16'
        })

        # Create a new array for the resampled data
        resampled_data = np.empty((ref_height, ref_width))

        # Resample the ports data
        reproject(
            source=ports_data,
            destination=resampled_data,
            src_transform=ports_src.transform,
            src_crs=ports_src.crs,
            dst_transform=ref_transform,  # Use the transform of the reference raster
            dst_crs=ref_crs,
            resampling=Resampling.nearest)

        # Write the resampled data to a new raster file
        with rasterio.open('resampled_ports.tif', 'w', **ports_profile) as dst:
            dst.write(resampled_data.astype('int16'), 1)

ports_masked_output_raster = os.path.join(output_dir,'ports_masked.tif')

# Pass the resampled ports data to your crop_and_mask_raster function
crop_and_mask_raster('resampled_ports.tif', raster_to_use_as_mask, ports_masked_output_raster, 22512, 20381)

Source data shape:(22512, 20381) Property data shape:(22512, 20381) Mask data shape:, (22512, 20381) After masking: Source data masked shape:, (22512, 20381)
