In [None]:
%load_ext autoreload
%autoreload 2


## Libraries

In [None]:
# Imports
import os
import multiprocessing as mp
import pkg_resources
import pathlib

import numpy as np
import pandas as pd
from tabulate import tabulate

import riskmapjnr as rmj


In [None]:
# GDAL
os.environ["GDAL_CACHEMAX"] = "1024"


## Set user parameters

In [None]:
iso_code = "MTQ"
years = [2000, 2010, 2023]
forest_source = "tmf"


In [None]:
models_to_compare = ["rmj_bm", "rmj_mw", "rf", "icar", "glm", "user"]
model_to_allocate = ["rmj_bm"]


In [None]:
jurisdiction_expected_total_deforestation_ha = 1000
years_to_forecast = 4


In [None]:
periods = ["forecast"]


## Connect folders

In [None]:
root_folder = pathlib.Path.cwd()
downloads_folder = root_folder / "downloads"


In [None]:
project_folder = downloads_folder / iso_code
processed_data_folder = project_folder / "data"


In [None]:
allocation_folder = project_folder / "allocation"
if not os.path.exists(allocation_folder):
    os.makedirs(allocation_folder)


## Select predictions files

In [None]:
def list_folders(directory):
    """
    Lists all folders (directories) within a specified directory.

    Parameters:
        directory (str): The path to the directory from which to list folders.

    Returns:
        list: A list of folder names within the specified directory.
              If an error occurs, returns an empty list and prints an error message.
    """
    try:
        # Create a Path object for the directory
        path = pathlib.Path(directory)

        # Filter out only directories (folders) using is_dir()
        folders = [entry for entry in path.iterdir() if entry.is_dir()]

        return folders
    except FileNotFoundError:
        print(f"The directory {directory} does not exist.")
        return []
    except Exception as e:
        print(f"An error occurred: {e}")
        return []


In [None]:
def filter_folders(input_folders, filter_words, exclude_words=None):
    """
    Filters a list of folders based on include and exclude words.
    Parameters:
        input_folders (list): List of folder names to be filtered.
        filter_words (list): Words that must be present in the folder names for inclusion.
        exclude_words (list, optional): Words that must not be present in the folder names for exclusion. Defaults to None.
    Returns:
        list: Filtered list of folders.
    """
    # Ensure all words are lowercase for case-insensitive comparison
    filter_words = [word.lower() for word in filter_words]
    exclude_words = [word.lower() for word in (exclude_words or [])]

    filtered_folders = [
        folder
        for folder in input_folders
        if any(word in folder.name.lower() for word in filter_words)
        and not any(
            exclude_word in folder.name.lower() for exclude_word in exclude_words
        )
    ]

    return filtered_folders


In [None]:
def list_files_by_extension(folder_path, file_extensions, recursive=False):
    """
    List all files with specified extensions in the given folder.
    Parameters:
    folder_path (str or Path): The path to the folder where you want to search for files.
    file_extensions (list of str): A list of file extensions to search for (e.g., ['.shp', '.tif']).
    recursive (bool): Whether to recursively search through subdirectories or not.
    Returns:
    list: A list of file paths with the specified extensions.
    """
    matching_files = []
    try:
        # Convert folder_path to Path object if it's a string
        folder_path = pathlib.Path(folder_path)

        # Check if the provided path is a directory
        if folder_path.is_dir():
            for entry in folder_path.iterdir():
                if entry.is_file() and any(
                    entry.suffix.lower() == ext.lower() for ext in file_extensions
                ):
                    matching_files.append(str(entry))
                elif recursive and entry.is_dir():
                    # Recursively search subdirectories
                    matching_files.extend(
                        list_files_by_extension(entry, file_extensions, recursive)
                    )
        else:
            print(f"The provided path '{folder_path}' is not a directory.")
    except Exception as e:
        print(f"An error occurred: {e}")
    return matching_files


In [None]:
def filter_files(input_files, filter_words, exclude_words=None, include_all=True):
    """
    Filters a list of files based on include and exclude words.
    Parameters:
        input_files (list): List of file paths to be filtered.
        filter_words (list): Words that must be present in the filenames for inclusion.
        exclude_words (list, optional): Words that must not be present in the filenames for exclusion. Defaults to None.
        include_all (bool, optional): If True, all filter words must be present in the filename. If False, at least one of the filter words must be present. Defaults to False.
    Returns:
        list: Filtered list of files.
    """
    # Ensure all words are lowercase for case-insensitive comparison
    filter_words = [word.lower() for word in filter_words]
    exclude_words = [word.lower() for word in (exclude_words or [])]

    if include_all:
        filtered_files = [
            file
            for file in input_files
            if all(word in pathlib.Path(file).name.lower() for word in filter_words)
            and not any(
                exclude_word in pathlib.Path(file).name.lower()
                for exclude_word in exclude_words
            )
        ]
    else:
        filtered_files = [
            file
            for file in input_files
            if any(word in pathlib.Path(file).name.lower() for word in filter_words)
            and not any(
                exclude_word in pathlib.Path(file).name.lower()
                for exclude_word in exclude_words
            )
        ]

    return filtered_files


In [None]:
def filter_out_ipynb_checkpoints(input_files):
    """
    Filters out files whose paths contain '.ipynb_checkpoints'.
    Parameters:
        input_files (list): List of file paths to be filtered.
    Returns:
        list: Filtered list of files.
    """
    filtered_files = [
        file
        for file in input_files
        if ".ipynb_checkpoints" not in pathlib.Path(file).parts
    ]
    return filtered_files


In [None]:
directory_path = project_folder
folders = list_folders(directory_path)
available_models = filter_folders(folders, model_to_allocate, ["data", "data_raw"])
available_models = filter_out_ipynb_checkpoints(available_models)
print("Models_available:", available_models)


In [None]:
model_folder = available_models[0]
tif_files = list_files_by_extension(model_folder, [".tif"], True)
available_prediction_files = filter_files(tif_files, periods, None, False)
available_prediction_file = available_prediction_files[0]
available_prediction_file


In [None]:
model_folder = available_models[0]
csv_files = list_files_by_extension(model_folder, [".csv"], True)
model_files = filter_files(csv_files, periods + ["defrate"], None, True)
available_defrate_files = filter_out_ipynb_checkpoints(model_files)
available_defrate_file = available_defrate_files[0]
available_defrate_file


## Select forest cover change file

In [None]:
# List all raster files in the processed data folder
input_raster_files = list_files_by_extension(processed_data_folder, [".tiff", ".tif"])


In [None]:
forest_change_file = filter_files(
    input_raster_files,
    ["forest", "loss", forest_source] + [str(num) for num in years],
    ["distance", "edge"],
)[0]


## Allocate deforestation to project

In [None]:
import fiona
from fiona.transform import transform_geom


def reproject_shapefile(input_shapefile, output_shapefile, target_epsg):
    # Open the input shapefile
    with fiona.open(input_shapefile, "r") as src:
        # Get the source CRS
        src_crs = src.crs
        # Define the target CRS
        target_crs = f"EPSG:{target_epsg}"

        # Check if the source and target CRS are different
        if src_crs != target_crs:
            # Define the transformation function
            project = lambda geom: transform_geom(src_crs, target_crs, geom)
            # Open the output shapefile for writing
            with fiona.open(
                output_shapefile,
                "w",
                crs="EPSG:32620",
                driver="ESRI Shapefile",
                schema=src.schema,
            ) as dst:
                # Iterate over features in the source shapefile
                for feature in src:
                    # Transform the geometry and create a new feature
                    new_feature = {**feature, "geometry": project(feature["geometry"])}
                    # Write the transformed feature to the output shapefile
                    dst.write(new_feature)
            return output_shapefile  # Return the output shapefile if reprojection was performed
        else:
            print("Source and target CRS are the same. No reprojection needed.")
            return input_shapefile  # Return the input shapefile if no reprojection was needed


In [None]:
project_borders = allocation_folder / "project_mtq.shp"


In [None]:
epsg_code = 32620


In [None]:
# base_name = os.path.splitext(os.path.basename(project_borders))[0]
# reprojected_filename = os.path.join(allocation_folder, f"{base_name}_reprojected.shp")
# project_borders_repro = reproject_shapefile(project_borders,reprojected_filename,epsg_code)


In [None]:
project_borders_repro = allocation_folder / "project_mtq_repro.shp"


In [None]:
import os

import numpy as np
from osgeo import gdal
import pandas as pd
import forestatrisk

# Local application imports
from forestatrisk.misc import progress_bar, makeblock

opj = os.path.join
opd = os.path.dirname


def allocate_deforestation(
    riskmap_juris_file,
    defor_rate_tab,
    defor_juris_ha,
    years_forecast,
    project_borders,
    output_file="defor_project.csv",
    defor_density_map=False,
    blk_rows=128,
    verbose=False,
):
    """Allocating deforestation.

    :param riskmap_juris_file: Raster file with classes of deforestation
      risk at the jurisdictional level.

    :param defor_rate_tab: CSV file including the table with
      deforestation rates for each deforestation class.

    :param defor_juris_ha: Expected deforestation at the
      jurisdictional level (in hectares).

    :param years_forecast: Length of the forecasting period (in years).

    :param project_borders: Vector file for project borders.

    :param output_file: Output file with deforestation
      allocated to the project.

    :param defor_density_map: Compute the deforestation density map
      for the jurisdiction. Deforestation density is provided in
      ha/pixel/year (hectares of deforestation per pixel per year).
      Deforestation densities are floating-point numbers. For large
      jurisdictions (e.g. country scale) and high resolutions (e.g. 30
      m), this will produce a large raster file which will occupy
      a large amount of space on disk (e.g. several gigabytes).

    :param blk_rows: If > 0, number of rows for block (else 256x256).

    :param verbose: If True, print messages.

    """

    # Callback
    cback = gdal.TermProgress_nocb if verbose else 0

    # Creation options
    copts = ["COMPRESS=DEFLATE", "BIGTIFF=YES"]

    # ---------------------------------------
    # Crop riskmap to project boundaries
    # ---------------------------------------

    out_dir = opd(output_file)
    ofile = opj(out_dir, "project_riskmap.tif")
    gdal.Warp(
        ofile,
        riskmap_juris_file,
        cropToCutline=True,
        warpOptions=["CUTLINE_ALL_TOUCHED=TRUE"],
        cutlineDSName=project_borders,
        creationOptions=copts,
        callback=cback,
    )

    # ---------------------------------------
    # Compute number of pixels for each class
    # ---------------------------------------

    nvalues = 65535
    with gdal.Open(ofile) as ds:
        band = ds.GetRasterBand(1)
        counts = band.GetHistogram(0.5, 65535.5, nvalues, 0, 0)
    data = {"cat": [i + 1 for i in range(65535)], "counts": counts}
    df_count = pd.DataFrame(data)

    # Upload deforestation rates
    df_rate = pd.read_csv(defor_rate_tab)

    # -----------------------------
    # Compute deforestation density
    # -----------------------------

    # Pixel area
    pixel_area = df_rate.loc[0, "pixel_area"]

    # Correction factor, either ndefor / sum_i p_i
    # or theta * nfor / sum_i p_i
    sum_pi = (df_rate["nfor"] * df_rate["rate_mod"]).sum()
    correction_factor = defor_juris_ha / (pixel_area * sum_pi)

    # Absolute deforestation rate
    df_rate["rate_abs"] = df_rate["rate_mod"] * correction_factor

    # Deforestation density (ha/pixel/yr)
    df_rate["defor_dens"] = df_rate["rate_abs"] * pixel_area / years_forecast

    # Save the df_rate table
    ofile = opj(out_dir, "defrate_cat_forecast.csv")
    df_rate.to_csv(ofile)

    # -----------------------------
    # Join tables
    # -----------------------------

    df_project = df_count.merge(right=df_rate, on="cat", how="left")

    # Annual deforestation (ha) for project
    defor_project = (df_project["counts"] * df_project["defor_dens"]).sum()

    # Save results
    data = {
        "period": ["annual", "entire"],
        "length (yr)": [1, years_forecast],
        "deforestation (ha)": [
            round(defor_project, 1),
            round(defor_project * years_forecast, 1),
        ],
    }
    res = pd.DataFrame(data)
    res.to_csv(output_file, header=True, index=False)

    # -----------------------------
    # Get deforestation density map
    # -----------------------------

    if defor_density_map:
        riskmap_r = gdal.Open(riskmap_juris_file)
        riskmap_b = riskmap_r.GetRasterBand(1)
        gt = riskmap_r.GetGeoTransform()
        proj = riskmap_r.GetProjection()
        ncol = riskmap_r.RasterXSize
        nrow = riskmap_r.RasterYSize

        output_file = opj(out_dir, "deforestation_density_map.tif")
        driver = gdal.GetDriverByName("GTiff")
        if os.path.isfile(output_file):
            os.remove(output_file)
        ddm_r = driver.Create(
            output_file,
            ncol,
            nrow,
            1,
            gdal.GDT_Float64,
            ["COMPRESS=DEFLATE", "BIGTIFF=YES"],
        )
        ddm_r.SetGeoTransform(gt)
        ddm_r.SetProjection(proj)
        ddm_b = ddm_r.GetRasterBand(1)
        ddm_b.SetNoDataValue(-9999.0)

        # Make blocks
        blockinfo = makeblock(riskmap_juris_file, blk_rows=blk_rows)
        nblock = blockinfo[0]
        nblock_x = blockinfo[1]
        x = blockinfo[3]
        y = blockinfo[4]
        nx = blockinfo[5]
        ny = blockinfo[6]
        if verbose:
            print(f"Divide region in {nblock} blocks")

        # Write raster of dd
        if verbose:
            print("Write deforestation density raster")
        # Loop on blocks of data
        for b in range(nblock):
            # Progress bar
            progress_bar(nblock, b + 1)
            # Position in 1D-arrays
            px = b % nblock_x
            py = b // nblock_x
            # Data for one block
            risk_data = riskmap_b.ReadAsArray(x[px], y[py], nx[px], ny[py])
            risk_data = risk_data.flatten(order="C")
            # Get defor density from risk class
            defor_dens = np.zeros(len(risk_data))
            defor_dens[risk_data == 0] = -9999.0

            non_zero_risk_classes = risk_data[risk_data != 0]

            # Make sure df_rate has 'cat' as index (do this once after reading)
            df_rate_indexed = df_rate.set_index("cat")

            defor_dens_values = pd.Series(non_zero_risk_classes).map(
                df_rate_indexed["defor_dens"]
            )

            if defor_dens_values.isna().any():
                missing = non_zero_risk_classes[defor_dens_values.isna()]
                print(
                    f"Warning: Missing deforestation density for risk classes: {missing.unique()}"
                )

            # Fill missing with 0 (or -9999, or raise an error)
            defor_dens_values = defor_dens_values.fillna(0.0)

            defor_dens[risk_data != 0] = defor_dens_values.values
            defor_dens = defor_dens.reshape((ny[py], nx[px]), order="C")
            # Write deforestation densities
            ddm_b.WriteArray(defor_dens, x[px], y[py])

        # Compute statistics
        if verbose:
            print("Compute statistics")
        ddm_b.FlushCache()  # Write cache data to disk
        ddm_b.ComputeStatistics(False)

        # Dereference gdal datasets
        riskmap_b = None
        ddm_b = None
        del riskmap_r, ddm_r


In [None]:
import forestatrisk as far


def allocate_deforestation2(
    available_prediction_file,
    available_defrate_file,
    jurisdiction_expected_total_deforestation_ha,
    years_to_forecast,
    project_borders,
):
    allocate_deforestation(
        riskmap_juris_file=available_prediction_file,
        defor_rate_tab=available_defrate_file,
        defor_juris_ha=jurisdiction_expected_total_deforestation_ha,
        years_forecast=years_to_forecast,
        project_borders=project_borders,
        output_file=allocation_folder / "defor_project.csv",
        defor_density_map=True,
        blk_rows=256,
        verbose=False,
    )


In [None]:
allocate_deforestation2(
    available_prediction_file,
    available_defrate_file,
    jurisdiction_expected_total_deforestation_ha,
    years_to_forecast,
    project_borders_repro,
)
