## Importing the required packages

In [None]:
# System Utilities
import os
import json
from pathlib import Path
from collections import defaultdict

# Earth Engine & Mapping
import ee
import geemap

# Data Handling & Processing
import pandas as pd
import numpy as np
import ipywidgets as widgets
from ipywidgets import Layout
from tqdm.notebook import tqdm

# Visualization & Plotting
import plotly.graph_objects as go
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

# Set theme for Seaborn visualizations
sns.set_theme()

## 1. Data collection

### 1.1. Set the parameters

#### 1.1.1. Loading the variables for selection

In [None]:
# Dropdown widget configuration
parks = [
    'Addo Elephant', 'Agulhas', 'Augrabies Falls', 'Bontebok', 'Camdeboo', 'Garden Route',
    'Golden Gate Highlands', 'Graspan', 'Groenkloof', 'Kalahari Gemsbok', 'Karoo', 'Kruger',
    'Mapungubwe', 'Marakele', 'Mokala', 'Mountain Zebra', 'Namaqua', 'Richtersveld',
    'Table Mountain', 'Tankwa Karoo', 'West Coast'
]

years = [str(y) for y in range(2016, 2024)]  # Generate year options dynamically

# Define dropdowns
park_dropdown = widgets.Dropdown(options=parks, value='Addo Elephant', description='Park:')
year_start_dropdown = widgets.Dropdown(options=years, value='2016', description='Start Year:')
year_end_dropdown = widgets.Dropdown(options=years, value='2023', description='End Year:')

# Store selections in a dictionary instead of global variables
selection = {
    "Park": park_dropdown.value,
    "Starting Year": int(year_start_dropdown.value),
    "Ending Year": int(year_end_dropdown.value)
}

# Function to update the dictionary and sync with global variables
def update_selection(change, key):
    selection[key] = int(change.new) if key != "Park" else change.new
    
    # Ensure Years list updates dynamically
    if "Starting Year" in selection and "Ending Year" in selection:
        global Years  # Update global variable
        Years = list(range(selection["Starting Year"], selection["Ending Year"] + 1))

# Attach event listeners
park_dropdown.observe(lambda change: update_selection(change, "Park"), names='value')
year_start_dropdown.observe(lambda change: update_selection(change, "Starting Year"), names='value')
year_end_dropdown.observe(lambda change: update_selection(change, "Ending Year"), names='value')

#### 1.1.2. Variable parameters (Please select from the list below)

In [None]:
# Please choose parameters from the dropdown list below
display(park_dropdown, year_start_dropdown, year_end_dropdown)

#### 1.1.3. Static parameters (Based on selection)

In [None]:
# No need to set any variables
# Calculate the list of years from starting_year to ending_year, inclusive
Years = list(range(selection["Starting Year"], selection["Ending Year"] + 1))

#Setting the properties for the layer to represent the data on the map and for color coding later on
dw_vis = {"min": 0, "max": 8, "palette": ["#419BDF", "#397D49", "#88B053", "#7A87C6", "#E49635", "#DFC35A", "#C4281B", "#A59B8F", "#B39FE1"]}

# Define class labels
class_labels = ['water', 'trees', 'grass', 'flooded_vegetation', 'crops', 'shrub_and_scrub', 'built', 'bare_soil', 'snow_and_ice']

# Get the parent directory (one level up) where config.json is stored
config_path = Path("..") / "config.json"

# Load configuration settings
with open(config_path, "r") as f:
    config = json.load(f)

# Set base path dynamically from config.json
base_path = Path("..") / config["base_path"]

# Define the different sub-areas
# CPA: Catchment Protected Area, VPA: Viewshed Protected Area, PNA: Priority Natural Areas, Parks: Park boundaries itself
potential_sub_areas = ['CPA', 'VPA', 'PNA', 'Parks', 'Dissolved']

# Initialize a list to store only the sub-areas with available shapefiles
sub_areas = []

# Check each sub-area for an available shapefile
for sub_area in potential_sub_areas:
    shapefile_path = base_path / sub_area / f"{selection['Park']}_{sub_area}.shp"  # Use Pathlib
    if shapefile_path.exists():  # Correct way to check if a Path exists
        sub_areas.append(sub_area)

### 1.2. Initiate the map (set credentials)

In [None]:
# Automatically authenticate and initialize Earth Engine when creating the map
try:
    ee.Initialize()
except Exception:
    ee.Authenticate()  # Runs only if authentication is required
    ee.Initialize()

# Initialize and display the interactive map
Map = geemap.Map()
Map

### 1.3. Loading the data (server-side) according to above set parameters

### 1.3.1. Loading the data in Earth Engine

In [None]:
# Initialize an empty dictionary to store results per sub-area and per year
results_per_area_and_year = {}

for sub_area in sub_areas:
    # Construct the path dynamically
    park_sub_shp = base_path / sub_area / f"{selection['Park']}_{sub_area}.shp"
    
    # Convert the shapefile to an Earth Engine feature (if the file exists)
    if park_sub_shp.exists():
        park_sub = geemap.shp_to_ee(str(park_sub_shp))
        geometry = park_sub.geometry()
        
        # Initialize a dictionary for this sub-area to store dw_class objects per year
        dw_classes_per_year = {}

        for Year in Years:
            start_date = f"{Year}-01-01"
            end_date = f"{Year}-12-31"

            # Load the DW dataset for the given year and sub-area
            dw_classes = geemap.dynamic_world(
                geometry, start_date, end_date, return_type="class", reducer="mode"
            )
            dw_class = dw_classes.clip(geometry)

            # Store the dw_class in the dictionary with the year as the key
            dw_classes_per_year[Year] = dw_class

        # Store the results for this sub-area
        results_per_area_and_year[sub_area] = dw_classes_per_year
    else:
        print(f"⚠️ Warning: Shapefile not found for {sub_area} → {park_sub_shp}")

### 1.3.2. Plotting the data on the map

In [None]:
# Function to plot the default LULC results
def plot_lulc():
    """
    Adds LULC data for the 'Dissolved' sub-area and the latest available year to the map.
    Users can modify this function to select different sub-areas or years.
    """
    sub_area = "Dissolved"  # Default sub-area
    year = max(Years)  # Select the latest available year

    if sub_area in results_per_area_and_year and year in results_per_area_and_year[sub_area]:
        Map.addLayer(results_per_area_and_year[sub_area][year], dw_vis, f"LULC {sub_area} {year}", False)
        print(f"✅ Added LULC layer for {sub_area}, {year} to the map.")
    else:
        print(f"⚠️ No data available for {sub_area} in {year}. Modify the function to select a different sub-area or year.")

# Plot the default example (Dissolved + latest year)
plot_lulc()

### 1.4. Creating the "Fisnet" (avoid API overload)

#### 1.4.2.1. Load dissolved buffers to get entire extent

In [None]:
# Load the dissolved parks and buffers shapefile (main default)
parks_shp = base_path / "dissolved_all_buffers_FINAL.shp"
parks = geemap.shp_to_ee(str(parks_shp))

# 💡 TIP: If the fisnet falls strange over the park, try loading the specific park's dissolved shapefile instead
# Uncomment the lines below if needed:
#park_dissolved_shp = base_path / "Dissolved" / f"{selection['Park']}_Dissolved.shp"
#park_dissolved = geemap.shp_to_ee(str(park_dissolved_shp))

#### 1.4.2.2. Create a "Fishnet" from above parameters

In [None]:
# Determine the bounding box for the study area before applying the fishnet grid
# Get the bounding box geometry from the dissolved parks dataset
bounding_box = parks.geometry().bounds()
# 💡 TIP: If the dissolved parks dataset does not look correct, try using the alternative below
# bounding_box = park_dissolved.geometry().bounds()  # Uncomment if necessary

# Convert the bounding box into a dictionary to extract coordinates
bounding_box_info = bounding_box.getInfo()

# Extract the coordinates of the bounding box
# This accesses the first and only element of the 'coordinates' list, which represents the bounding box polygon.
coords = bounding_box_info['coordinates'][0]  

# Extract min and max longitude and latitude
min_lon, min_lat = coords[0]  # Minimum longitude and latitude
max_lon, max_lat = coords[2]  # Maximum longitude and latitude

# Create an Earth Engine BBox geometry object using these coordinates
region = ee.Geometry.BBox(min_lon, min_lat, max_lon, max_lat)

# Generate a fishnet (grid) overlaying the region of interest
# h_interval & v_interval define the horizontal & vertical spacing of the grid (in degrees)
fishnet = geemap.fishnet(region, h_interval=1.0, v_interval=1.0)
# 💡 TIP: h_interval & v_interval works good but can be increased/decreased to find balance between API load and performance

#### 1.4.2.3. Pre-calculate the sub-area and fishnet intersections (windows)

In [None]:
# Initialize a dictionary to store window geometries for each sub-area
window_geometries_per_sub_area = {}

for sub_area in sub_areas:
    # Construct the dynamic path for the shapefile
    park_sub_shp = base_path / sub_area / f"{selection['Park']}_{sub_area}.shp"

    if park_sub_shp.exists():
        park_sub = geemap.shp_to_ee(str(park_sub_shp))
        geometry = park_sub.geometry()

        # Calculate intersected features for this sub-area
        intersected_features = fishnet.map(
            lambda feature: ee.Feature(feature).intersection(geometry, ee.ErrorMargin(1))
        )

        # Initialize window geometries list
        window_geometries = []
        try:
            feature_list = intersected_features.getInfo()['features']
            for feature in feature_list:
                geom = ee.Geometry(feature['geometry'])
                area = geom.area().getInfo()
                if area > 1:  # Only keep areas larger than 1m²
                    window_geometries.append(geom)

            # Store the window geometries for this sub-area
            window_geometries_per_sub_area[sub_area] = window_geometries
            print(f"✅ Processed {sub_area}: {len(window_geometries)} window(s) generated.")
        except Exception as e:
            print(f"⚠️ Error processing {sub_area}: {e}")
    else:
        print(f"⚠️ Shapefile not found: {park_sub_shp}")

### 1.5. Calculate the number of pixels per class

###### ⚠️ NOTE: This is the most time-consuming part of the script (depending on the size of the park's dissolved area) BUT once all the data is fetched from server side the plotting can begin and is quick. ⚠️

In [None]:
# Define base directory for output CSV files dynamically
output_base_path = Path("..") / "data" / "DW_datasets" / selection["Park"]
output_base_path.mkdir(parents=True, exist_ok=True)  # Ensure directory exists

# Process LULC data per sub-area per year
for sub_area in sub_areas:
    # Access pre-calculated window geometries for the current sub-area
    window_geometries = window_geometries_per_sub_area[sub_area]
    all_years_data = []

    # Iterate through each year's data for the current sub-area
    for year, dw_class in tqdm(results_per_area_and_year[sub_area].items(), desc=f"Processing Years for {sub_area}"):
        aggregated_pixel_counts = defaultdict(int)

        # Perform parallelized reduction across all windows in GEE
        def process_window(window_geometry):
            pixel_count_stats = dw_class.reduceRegion(
                reducer=ee.Reducer.frequencyHistogram(),
                geometry=window_geometry,
                scale=10,  # Adjust scale based on dataset resolution
                maxPixels=1e10
            ).getInfo()

            return pixel_count_stats.get('label_mode', {})

        # Use list comprehension to apply the function across all windows
        pixel_counts_list = [process_window(w) for w in tqdm(window_geometries, desc=f"Processing Windows for Year {year}")]

        # Aggregate pixel counts across windows
        for pixel_counts in pixel_counts_list:
            for key, count in pixel_counts.items():
                aggregated_pixel_counts[key] += count

        # Ensure class labels are correctly mapped
        mapped_keys = {str(i): label for i, label in enumerate(class_labels)}
        pixel_counts_formatted = {mapped_keys.get(key, key): value for key, value in aggregated_pixel_counts.items()}

        all_years_data.append({'Year': year, **pixel_counts_formatted})

    # Convert results into a DataFrame
    df = pd.DataFrame(all_years_data)
    df.set_index('Year', inplace=True)

    # Define the output filename dynamically
    filename = output_base_path / f"{selection['Park']}_{sub_area}_LULC_from_{selection['Starting Year']}_to_{selection['Ending Year']}.csv"

    # Save DataFrame to CSV
    df.to_csv(filename, index=True)
    print(f"✅ Data saved: {filename}")

## 3. Data collection of LULC CHANGES over time

### 3.1. Collecting & calculating the LULC change (server-side)

In [None]:
# Initialize an empty dictionary to store LULC change results per sub-area and per year pair
results_per_area_and_year_pairs = {}

for sub_area in sub_areas:
    # Construct the dynamic path for the shapefile
    park_sub_shp = base_path / sub_area / f"{selection['Park']}_{sub_area}.shp"

    if park_sub_shp.exists():
        park_sub = geemap.shp_to_ee(str(park_sub_shp))
        geometry = park_sub.geometry()

        # Retrieve already calculated LULC classifications per year
        dw_classes_per_year = results_per_area_and_year.get(sub_area, {})

        # Ensure we have at least two years of data to compute changes
        if len(dw_classes_per_year) < 2:
            print(f"⚠️ Not enough data for {sub_area} to compute LULC changes.")
            continue

        # Compute LULC changes for each pair of consecutive years within this sub-area
        dw_classes_per_year_pairs = {}
        for year_index in range(len(Years) - 1):
            pre_year = Years[year_index]
            post_year = Years[year_index + 1]

            # Check if both years exist in the dataset
            if pre_year in dw_classes_per_year and post_year in dw_classes_per_year:
                image_pre = dw_classes_per_year[pre_year].select('label_mode')
                image_post = dw_classes_per_year[post_year].select('label_mode')

                # Combine the two images into a single image encoding transitions
                combined = image_pre.multiply(10).add(image_post)

                # Store the combined LULC change image for this year pair within the sub-area
                dw_classes_per_year_pairs[f"{pre_year}-{post_year}"] = combined

        # Store the LULC change results for this sub-area
        results_per_area_and_year_pairs[sub_area] = dw_classes_per_year_pairs
        print(f"✅ Processed LULC changes for {sub_area}: {len(dw_classes_per_year_pairs)} year-pair transitions.")

    else:
        print(f"⚠️ Shapefile not found: {park_sub_shp}")

### 3.2. Populating a csv with the LULC change data

###### ⚠️ NOTE: This is an equally time-consuming part of the script (depending on the size of the park's dissolved area) BUT once all the data is fetched from server side the plotting can begin and is quick. ⚠️

In [None]:
# Process LULC transitions per sub-area
for sub_area in sub_areas:
    # Access pre-calculated window geometries for the current sub-area
    window_geometries = window_geometries_per_sub_area[sub_area]

    yearly_transition_counts = {}

    # Iterate through each year pair's data for the current sub-area
    for year_pair, dw_class in tqdm(results_per_area_and_year_pairs[sub_area].items(), desc=f"Processing Year Pairs for {sub_area}"):
        year_pair_transition_counts = defaultdict(int)
        pre_year, post_year = year_pair.split('-')

        # Define transition label mapping once (avoiding recalculating in loops)
        transition_label_map = {str(i * 10 + j): f"{class_labels[i]}_to_{class_labels[j]}" 
                                for i in range(len(class_labels)) for j in range(len(class_labels))}

        # Perform parallelized reduction across all windows in GEE
        def process_window(window_geometry):
            transition_counts = dw_class.reduceRegion(
                reducer=ee.Reducer.frequencyHistogram(),
                geometry=window_geometry,
                scale=10,
                maxPixels=1e10
            ).getInfo()

            return transition_counts.get('label_mode', {})

        # Use list comprehension for parallel processing
        transition_counts_list = [process_window(w) for w in tqdm(window_geometries, desc=f"Processing Windows for Year Pair {year_pair}")]

        # Aggregate transition counts across all windows
        for transition_counts_dict in transition_counts_list:
            for combined_value, count in transition_counts_dict.items():
                transition_label = transition_label_map.get(str(combined_value), f"Unknown_{combined_value}")
                year_pair_transition_counts[transition_label] += count

        # Store the aggregated counts for the current year transition
        yearly_transition_counts[f"{pre_year}_to_{post_year}"] = year_pair_transition_counts

    # Convert the dictionary to a DataFrame for CSV export
    df_transitions = pd.DataFrame(yearly_transition_counts).fillna(0).reset_index().rename(columns={'index': 'Change'})

    # Ensure proper ordering of columns based on transition years
    ordered_cols = ['Change'] + sorted(df_transitions.columns[1:], key=lambda x: int(x.split('_to_')[0]) if x.split('_to_')[0].isdigit() else float('inf'))
    df_transitions = df_transitions[ordered_cols]

    # Define dynamic output filename
    csv_file_path = output_base_path / f"{selection['Park']}_{sub_area}_LULC_change_from_{selection['Starting Year']}_to_{selection['Ending Year']}.csv"

    # Save DataFrame to CSV
    df_transitions.to_csv(csv_file_path, index=False)
    print(f"✅ Data saved: {csv_file_path}")

# 2. Plotting the data collected above

### 2.1. Characterize the buffer zones

In [None]:
# Function to load CSV files safely
def load_csv(file_path):
    """Loads a CSV file into a DataFrame, handling missing files gracefully."""
    if not file_path.exists():
        print(f"Warning: File not found - {file_path}")
        return None
    return pd.read_csv(file_path, index_col="Year")

# Function to plot line charts
def plot_line_chart(ax, df, valid_labels, linestyle, label_prefix=""):
    """Plots line charts for the given DataFrame and valid class labels."""
    for column in valid_labels:
        if column in df.columns:
            color = palette[class_labels.index(column)]
            ax.plot(df.index, df[column], marker="o", linestyle=linestyle, label=f"{label_prefix}{column}", color=color)

# Function to add pie charts
def add_pie_charts(fig, ax, dissolved_df, valid_labels):
    """Adds pie charts to the plot for each year."""
    relative_pie_size = 0.15
    xticks = ax.get_xticks()
    xticks = xticks[(xticks >= dissolved_df.index.min()) & (xticks <= dissolved_df.index.max())]
    xticks = xticks[:len(dissolved_df.index)]
    
    for i, year in enumerate(dissolved_df.index):
        pie_x = xticks[i]
        trans = ax.transData + fig.transFigure.inverted()
        pie_x_fig, _ = trans.transform((pie_x, 0))
        pie_rect = [pie_x_fig - relative_pie_size / 2, 0.1, relative_pie_size, relative_pie_size]
        
        pie_data = dissolved_df.loc[year, valid_labels]
        pie_colors = [palette[class_labels.index(col)] for col in pie_data.index]

        ax_pie = fig.add_axes(pie_rect, frameon=False)
        ax_pie.pie(pie_data, colors=pie_colors, startangle=90)

In [None]:
# Define repository-relative paths
park_dir = Path("..") / "data" / "DW_datasets" / selection["Park"]
dissolved_file = park_dir / f"{selection['Park']}_Dissolved_LULC_from_{selection['Starting Year']}_to_{selection['Ending Year']}.csv"
parks_file = park_dir / f"{selection['Park']}_Parks_LULC_from_{selection['Starting Year']}_to_{selection['Ending Year']}.csv"

# Load CSV data
dissolved_df = load_csv(dissolved_file)
parks_df = load_csv(parks_file)

# Ensure DataFrames are valid before proceeding
if dissolved_df is None or parks_df is None:
    print("Error: One or more required files are missing. Please check the dataset directory.")
else:
    # Fill NaN values
    dissolved_df = dissolved_df.fillna(0)
    parks_df = parks_df.fillna(0)

    # Identify valid class labels
    valid_class_labels = [label for label in class_labels if label in dissolved_df.columns or label in parks_df.columns]

    # Setup figure
    fig, ax = plt.subplots(figsize=(15, 10))

    # Plot line charts
    plot_line_chart(ax, dissolved_df, valid_class_labels, linestyle="-")
    plot_line_chart(ax, parks_df, valid_class_labels, linestyle="--", label_prefix="Park ")

    # Customize plot
    ax.set_xlabel("Year")
    ax.set_ylabel("Pixel count")
    ax.set_yscale("log")
    ax.set_title(f"LULC for {selection['Park']}'s park vs. dissolved buffer zone from {selection['Starting Year']} to {selection['Ending Year']}", fontsize=18)
    ax.legend(title="Class", bbox_to_anchor=(1.05, 1), loc="upper left")
    ax.set_xticks(dissolved_df.index)
    ax.tick_params(axis="x", rotation=45)
    ax.grid(True, which="both", linestyle="--")

    # Add pie charts
    add_pie_charts(fig, ax, dissolved_df, valid_class_labels)

    # Adjust layout
    plt.subplots_adjust(bottom=0.3)
    plt.show()

### 2.2. Plotting the normalized yearly difference

In [None]:
# Function to calculate yearly differences and normalize them
def calculate_normalized_diff(df):
    """
    Calculates the difference between consecutive years and normalizes them
    to the maximum absolute value for each column.
    """
    diff_df = df.diff().fillna(0)  # Compute differences, filling NaN with 0
    max_values = diff_df.abs().max()  # Maximum absolute values per column
    
    # Avoid division by zero
    max_values[max_values == 0] = 1  # Set max to 1 where values are all zero to prevent NaN
    
    normalized_diff = diff_df / max_values  # Normalize
    return normalized_diff

In [None]:
# Compute normalized differences
dissolved_diff_normalized = calculate_normalized_diff(dissolved_df)
parks_diff_normalized = calculate_normalized_diff(parks_df)

# Identify available classes present in both datasets
available_classes = [label for label in class_labels if label in dissolved_diff_normalized.columns and label in parks_diff_normalized.columns]
num_classes = len(available_classes)

# Dynamically determine subplot layout
ncols = 3
nrows = (num_classes + ncols - 1) // ncols  # Round up to ensure all classes fit

# Create subplots
fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(18, 12))
axes = axes.flatten()

for i, column in enumerate(available_classes):
    ax = axes[i]
    
    # Define bar width and x-axis positions
    width = 0.35
    x = np.arange(len(dissolved_diff_normalized.index))
    
    # Get color from palette
    color = palette[class_labels.index(column)]

    # Plot for dissolved buffer zones
    ax.bar(x - width/2, dissolved_diff_normalized[column], width, label=f'Dissolved {column}', color=color)
    
    # Plot for parks with dashed edge and hatch
    ax.bar(x + width/2, parks_diff_normalized[column], width, label=f'Park {column}', 
           color='none', edgecolor=color, linestyle='--', hatch='//')
    
    # Axis customization
    ax.set_title(column)
    ax.set_xticks(x)
    ax.set_xticklabels(dissolved_diff_normalized.index, rotation=45)
    ax.set_ylim(-1.1, 1.1)  # Set y-axis range to [-1, 1]
    ax.legend()
    ax.grid(True, which="both", linestyle="--")

# Remove empty subplots
for j in range(i + 1, len(axes)):
    fig.delaxes(axes[j])

# Global title and layout adjustments
plt.suptitle(f"Normalized Yearly Difference in LULC for {selection['Park']}'s Park vs. Dissolved Buffer Zone "
             f"from {selection['Starting Year']} to {selection['Ending Year']}", fontsize=18)
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.show()

### 2.2. Plotting the LULCC on Sankey diagram

#### 2.2.1. Prepare the data in Sankey format

In [None]:
# Load the dissolved buffer zones
dissolved_change_df = pd.read_csv(fr'C:\\Users\\grobler\\Desktop\\Personal\\Masters\\Data\\DW_datasets\\{Park}\\{Park}_Dissolved_LULC_change_from_{starting_year}_to_{ending_year}.csv', index_col='Change')

# Initialize lists to store the source, target, and values for the Sankey diagram
sources = []
targets = []
values = []

# Number of class labels
num_labels = len(class_labels)

# Generate sources, targets, and values from the DataFrame
for col in dissolved_change_df.columns:
    from_year, to_year = col.split('_to_')
    for index, value in dissolved_change_df[col].items():
        if value > 0:  # Only create a link if there's a non-zero value
            from_class, to_class = index.split('_to_')
            source_index = (int(from_year) - starting_year) * num_labels + class_labels.index(from_class)
            target_index = (int(to_year) - starting_year) * num_labels + class_labels.index(to_class)
            sources.append(source_index)
            targets.append(target_index)
            values.append(value)

# Define node labels and colors (repeated for each year)
node_labels = [f'{label}' for year in range(starting_year, ending_year + 1) for label in class_labels]
node_colors = palette * (ending_year - starting_year + 1)  # Ensure each node has a color

In [None]:
def load_dissolved_change_data(selection):
    """
    Loads the dissolved LULC change data from the CSV file.

    Parameters:
    - selection (dict): Contains 'Park', 'Starting Year', and 'Ending Year'.

    Returns:
    - dissolved_change_df (pd.DataFrame): DataFrame containing LULC changes.
    """
    # Define file path relative to the GitHub repository
    park_dir = Path("..") / "data" / "DW_datasets" / selection["Park"]
    change_file = park_dir / f"{selection['Park']}_Dissolved_LULC_change_from_{selection['Starting Year']}_to_{selection['Ending Year']}.csv"

    # Ensure file exists
    if not change_file.exists():
        raise FileNotFoundError(f"Error: File not found - {change_file}")

    return pd.read_csv(change_file, index_col="Change")

def process_sankey_data(dissolved_change_df, selection, class_labels, palette):
    """
    Processes LULC change data to generate Sankey diagram inputs.

    Parameters:
    - dissolved_change_df (pd.DataFrame): DataFrame containing LULC changes.
    - selection (dict): Contains 'Park', 'Starting Year', and 'Ending Year'.
    - class_labels (list): List of LULC class names.
    - palette (list): List of colors corresponding to class labels.

    Returns:
    - sources (list): Source indices for transitions.
    - targets (list): Target indices for transitions.
    - values (list): Magnitude of transitions.
    - node_labels (list): Labels for each node.
    - node_colors (list): Colors associated with nodes.
    """
    sources = []
    targets = []
    values = []
    
    num_labels = len(class_labels)  # Number of LULC classes

    # Generate sources, targets, and values from the DataFrame
    for col in dissolved_change_df.columns:
        try:
            from_year, to_year = map(int, col.split("_to_"))  # Extract years from column name
        except ValueError:
            print(f"Skipping column {col}: Invalid format.")
            continue

        for index, value in dissolved_change_df[col].items():
            if value > 0:  # Only create a link if there's a non-zero value
                try:
                    from_class, to_class = index.split("_to_")  # Extract LULC classes
                except ValueError:
                    print(f"Skipping row {index}: Invalid format.")
                    continue

                if from_class in class_labels and to_class in class_labels:
                    source_index = (from_year - selection["Starting Year"]) * num_labels + class_labels.index(from_class)
                    target_index = (to_year - selection["Starting Year"]) * num_labels + class_labels.index(to_class)

                    sources.append(source_index)
                    targets.append(target_index)
                    values.append(value)

    # Define node labels (repeated for each year)
    node_labels = [f"{label}" for year in range(selection["Starting Year"], selection["Ending Year"] + 1) for label in class_labels]
    node_colors = palette * (selection["Ending Year"] - selection["Starting Year"] + 1)  # Repeat colors for each year

    return sources, targets, values, node_labels, node_colors

In [None]:
# Load the data
dissolved_change_df = load_dissolved_change_data(selection)

# Process the data for the Sankey diagram
sources, targets, values, node_labels, node_colors = process_sankey_data(dissolved_change_df, selection, class_labels, palette)

#### 2.2.2. Plot the Sankey-diagrams

In [None]:
def plot_sankey_diagram(sources, targets, values, node_labels, node_colors, selection):
    """
    Generates a Sankey diagram for visualizing LULC change over time.

    Parameters:
    - sources (list): List of source node indices.
    - targets (list): List of target node indices.
    - values (list): List of flow values.
    - node_labels (list): Labels for each node.
    - node_colors (list): Colors for each node.
    - selection (dict): Contains 'Park', 'Starting Year', and 'Ending Year'.
    """
    if not (len(sources) == len(targets) == len(values)):
        raise ValueError("Mismatch in the length of sources, targets, and values.")

    if len(node_labels) != len(node_colors):
        raise ValueError("Mismatch between the number of node labels and node colors.")

    # Create Sankey Diagram
    fig = go.Figure(data=[go.Sankey(
        node=dict(
            pad=15,
            thickness=20,
            line=dict(color="black", width=0.5),
            label=node_labels,
            color=node_colors
        ),
        link=dict(
            source=sources,
            target=targets,
            value=values
        )
    )])

    # Calculate proportional positions for year annotations along the x-axis
    starting_year = selection["Starting Year"]
    ending_year = selection["Ending Year"]
    
    year_positions = {
        year: (year - starting_year) / (ending_year - starting_year) 
        for year in range(starting_year, ending_year + 1)
    }

    # Update layout with title and year annotations
    fig.update_layout(
        title=dict(
            text=f"LULC Change for {selection['Park']}'s Dissolved Buffer Zone from {starting_year} to {ending_year}",
            font=dict(size=20)
        ),
        font=dict(size=10),
        annotations=[
            dict(
                showarrow=False,
                text=str(year),
                xref="paper",
                yref="paper",
                x=year_positions[year],
                y=-0.1,
                align="center",
                font=dict(size=10)
            ) for year in range(starting_year, ending_year + 1)
        ],
        height=600
    )

    # Show the Sankey diagram
    fig.show()

In [None]:
# Plot the Sankey diagram
plot_sankey_diagram(sources, targets, values, node_labels, node_colors, selection)

# 3. Calculate the LULCC intensity

### 3.1. Choose sub-area for further investigation

In [None]:
# Ensure sub_areas is defined
if 'sub_areas' not in globals():
    raise ValueError("Error: 'sub_areas' list is not defined. Please ensure it's available before running this cell.")

# Define the Dropdown Widget
sub_area_dropdown = widgets.Dropdown(
    options=sub_areas,
    value=sub_areas[0] if sub_areas else 'Dissolved',  # Default to first option if available
    description='Buffer sub-area for further investigation:',
    disabled=False
)

# Define an Update Function
def inv_sub_area_change(change):
    """
    Updates the selected buffer sub-area when the dropdown value changes.

    Parameters:
    - change (dict): Contains old and new values for the widget.
    """
    print(f"Buffer sub-area to investigate changed to: {change['new']}")

# Attach the Update Function to the Dropdown
sub_area_dropdown.observe(inv_sub_area_change, names='value')

# Print guidance for users
print(
    "💡 TIP: Start with 'Dissolved' and then explore other options as needed.\n\n"
    "Abbreviations:\n"
    "- CPA: Catchment Protected Area\n"
    "- VPA: Viewshed Protected Area\n"
    "- PNA: Priority Natural Areas\n"
    "- Parks: Park boundaries themselves"
)

# Display the dropdown in a Jupyter notebook
display(sub_area_dropdown)

### 3.2. Prepare data in "cross-tabulation matrix" format

In [None]:
def load_sub_area_data(selection, sub_area):
    """
    Loads LULC transition data for a selected sub-area.

    Parameters:
    - selection (dict): Contains 'Park', 'Starting Year', and 'Ending Year'.
    - sub_area (str): Selected buffer sub-area.

    Returns:
    - df (pd.DataFrame): DataFrame containing LULC transitions.
    """
    # Define the path relative to the repository
    park_dir = Path("..") / "data" / "DW_datasets" / selection["Park"]
    file_path = park_dir / f"{selection['Park']}_{sub_area}_LULC_change_from_{selection['Starting Year']}_to_{selection['Ending Year']}.csv"

    # Check if file exists before reading
    if not file_path.exists():
        raise FileNotFoundError(f"Error: File not found - {file_path}")

    # Load CSV into DataFrame
    df = pd.read_csv(file_path)

    # Ensure the 'Change' column exists for processing
    if "Change" not in df.columns:
        raise KeyError(f"Error: Column 'Change' not found in {file_path}")

    return df

def compute_transition_matrices(df):
    """
    Computes yearly LULC transition matrices.

    Parameters:
    - df (pd.DataFrame): DataFrame containing LULC transitions.

    Returns:
    - transition_matrices (dict): Dictionary of DataFrames, each representing a transition matrix for a year.
    """
    # Ensure 'Change' column is split into 'from' and 'to'
    if "Change" in df.columns:
        df[['from', 'to']] = df['Change'].str.split('_to_', expand=True)
    else:
        raise KeyError("Error: 'Change' column is missing in the dataset.")

    # Initialize a dictionary to store transition matrices
    transition_matrices = {}

    # Identify relevant year columns (excluding 'Change', 'from', 'to')
    year_columns = [col for col in df.columns if col not in ["Change", "from", "to"]]

    for year in year_columns:
        # Pivot to create transition matrix for the current year
        matrix = df.pivot(index='from', columns='to', values=year).fillna(0)

        # Ensure all LULC classes exist in both 'from' and 'to' categories
        all_classes = sorted(set(matrix.index) | set(matrix.columns))
        matrix = matrix.reindex(index=all_classes, columns=all_classes, fill_value=0)

        # Calculate summary statistics
        matrix = matrix.T  # Transpose for calculation
        matrix['Final total'] = matrix.sum(axis=1)

        # SAFE diagonal extraction (Gross Gain)
        matrix['Gross gain'] = matrix['Final total'] - [
            matrix.at[state, state] if state in matrix.columns and state in matrix.index else 0 for state in matrix.index
        ]

        # Flip back to original orientation
        matrix = matrix.T
        matrix['Initial total'] = matrix.sum(axis=1)

        # SAFE diagonal extraction (Gross Loss)
        matrix['Gross loss'] = matrix['Initial total'] - [
            matrix.at[state, state] if state in matrix.columns and state in matrix.index else 0 for state in matrix.index
        ]

        # Store the computed transition matrix
        transition_matrices[year] = matrix

    return transition_matrices

In [None]:
# Load the dataset for the selected sub-area
df = load_sub_area_data(selection, sub_area_dropdown.value)

# Compute transition matrices for all years
transition_matrices = compute_transition_matrices(df)

### 3.3. Calcualte the "Time Intensity" of LULCC

#### 3.3.1. "Time Intensity" calculation (St & U)

In [None]:
# Initialize lists to hold the calculated values
time_intervals = []
annual_rates_of_change = []

# Loop through each year interval
for year_interval, matrix in transition_matrices.items():
    try:
        # Ensure row labels are treated correctly
        if 'Final total' not in matrix.index or 'Gross gain' not in matrix.index:
            print(f"⚠️ Warning: 'Final total' or 'Gross gain' is missing in {year_interval}, skipping.")
            continue  # Skip this year if required data is missing

        # Retrieve the total area of change (sum of 'Gross gain' row)
        total_area_of_change = matrix.loc['Gross gain'].sum()
        # Retrieve the total area of the study region (sum of 'Final total' row)
        total_area_of_study_region = matrix.loc['Final total'].sum()

        # Assume a duration of the interval in years of 1 for simplicity
        duration_of_interval = 1
        # Calculate the annual rate of change (time intensity)
        time_intensity = (total_area_of_change / total_area_of_study_region) / duration_of_interval * 100

        # Store the results
        time_intervals.append(year_interval)
        annual_rates_of_change.append(time_intensity)

    except KeyError as e:
        print(f"⚠️ Warning: Unexpected missing data in transition matrix for {year_interval}: {e}")
        continue  # Skip problematic years

# Calculate the uniform intensity value across all intervals
try:
    # Use only matrices where 'Final total' is present
    valid_matrices = [matrix for matrix in transition_matrices.values() if 'Final total' in matrix.index]

    if not valid_matrices:
        raise ValueError("No valid transition matrices found with 'Final total' row.")

    # Sum the 'Gross gain' row across all years
    total_change_over_all_intervals = sum(matrix.loc['Gross gain'].sum() for matrix in valid_matrices)
    # Use the 'Final total' sum from the first valid matrix
    total_study_area = valid_matrices[0].loc['Final total'].sum()
    total_time = len(time_intervals)
    uniform_intensity = (total_change_over_all_intervals / total_study_area) / total_time * 100

except (KeyError, ValueError) as e:
    print(f"⚠️ Warning: Error while calculating uniform intensity: {e}")
    uniform_intensity = None  # Set a default value to avoid errors

# Initialize significant_intervals to an empty list
significant_intervals = []

# Identify time intervals where the annual rate of change is greater than uniform intensity
if uniform_intensity is not None:
    significant_intervals = [year for year, rate in zip(time_intervals, annual_rates_of_change) if rate > uniform_intensity]

print("📊 Significant Intervals:", significant_intervals)

#### 3.3.2. "Time Intensity" plots

In [None]:
def plot_time_intensity_analysis(time_intervals, annual_rates_of_change, uniform_intensity, selection, sub_area):
    """
    Plots the annual rates of LULCC change over time and compares them with uniform intensity.

    Parameters:
    - time_intervals (list): List of time periods (e.g., '2016_to_2017').
    - annual_rates_of_change (list): Corresponding annual rates of change (percent values).
    - uniform_intensity (float): The uniform intensity threshold for comparison.
    - selection (dict): Contains 'Park' name for title.
    - sub_area (str): Selected sub-area for investigation.

    Returns:
    - Displays a horizontal bar plot of the time intensity analysis.
    """

    # Ensure valid data before plotting
    if not time_intervals or not annual_rates_of_change or uniform_intensity is None:
        print("⚠️ Error: Missing required data for plotting. Ensure time_intervals, annual_rates_of_change, and uniform_intensity are available.")
        return

    # Create the figure
    plt.figure(figsize=(10, 5))

    # Plot annual rates of change as horizontal bars
    plt.barh(time_intervals, annual_rates_of_change, color='gray', edgecolor='black', label="Annual Rate of Change")

    # Plot the uniform intensity threshold as a red dashed line
    plt.axvline(x=uniform_intensity, color='red', linestyle='--', label=f'Uniform Intensity: {uniform_intensity:.2f}%')

    # Add "Slow" and "Fast" labels relative to the uniform intensity line
    y_position = max(len(time_intervals) - 1, 1)  # Ensure labels are within plot range
    plt.text(uniform_intensity - 0.08, y_position, 'Slow',
             verticalalignment='center', horizontalalignment='right', color='red', fontsize=12)
    plt.text(uniform_intensity + 0.08, y_position, 'Fast',
             verticalalignment='center', horizontalalignment='left', color='red', fontsize=12)

    # Add labels and title
    plt.xlabel('Annual Change Area (percent of map)', fontsize=12)
    plt.ylabel('Time Interval', fontsize=12)
    plt.title(f"LULCC Time Intensity Analysis for {selection['Park']}'s {sub_area} Buffer Zone", fontsize=15)
    plt.legend()

    # Show the plot
    plt.show()

In [None]:
# Plot the time intensity analysis for the selected sub-area
plot_time_intensity_analysis(time_intervals, annual_rates_of_change, uniform_intensity, selection, sub_area_dropdown.value)

### 3.4. Calcualte the "Category Intensity" of LULCC

#### 3.4.1. "Category Intensity" calculation (Gtj & Lti)

###### ⚠️ Note: this version of the code is developed to only analyse those years with St > U identified above. ⚠️

In [None]:
def calculate_category_intensities(significant_intervals, transition_matrices, duration_of_interval=1):
    """
    Computes category gain and loss intensities for each significant time interval.

    Parameters:
    - significant_intervals (list): List of years with significant LULCC changes.
    - transition_matrices (dict): Dictionary containing transition matrices for each year.
    - duration_of_interval (int): Number of years in the interval (default=1).

    Returns:
    - category_intensities (dict): Dictionary with per-category loss and gain intensities.
    """
    # Dictionary to store category intensities per year
    category_intensities = {}

    # Loop through each significant year
    for year in significant_intervals:
        # Retrieve the transition matrix for the current year
        matrix = transition_matrices[year]

        # Initialize dictionaries for loss and gain intensities
        loss_intensities = {}
        gain_intensities = {}

        # Loop through the categories in the matrix (excluding summary rows)
        for category in matrix.index[:-2]:  # Excluding 'Final total' and 'Gross gain'
            try:
                # Retrieve necessary values from the matrix
                initial_total = matrix.loc[category, 'Initial total']
                final_total = matrix.T.loc[category, 'Final total']
                gross_loss = matrix.loc[category, 'Gross loss']
                gross_gain = matrix.T.loc[category, 'Gross gain']

                # Calculate loss intensity (only if `initial_total` > 0)
                if initial_total > 0:
                    loss_intensity = (gross_loss / duration_of_interval) / initial_total * 100
                    loss_intensities[category] = loss_intensity

                # Calculate gain intensity (only if `final_total` > 0)
                if final_total > 0:
                    gain_intensity = (gross_gain / duration_of_interval) / final_total * 100
                    gain_intensities[category] = gain_intensity

            except KeyError as e:
                print(f"⚠️ Warning: Missing data for category '{category}' in {year}: {e}")
                continue  # Skip problematic categories

        # Store computed intensities for the current year
        category_intensities[year] = {
            'loss_intensities': loss_intensities,
            'gain_intensities': gain_intensities
        }

    return category_intensities

In [None]:
# Applying the method for calculating category intensities
category_intensities = calculate_category_intensities(significant_intervals, transition_matrices)

#### 3.4.2. "Category Intensity" plots

In [None]:
def plot_category_intensity_analysis(significant_intervals, category_intensities, time_intervals, annual_rates_of_change, selection, sub_area, total_categories=9):
    """
    Plots the LULCC category intensity analysis for significant intervals.

    Parameters:
    - significant_intervals (list): List of significant years.
    - category_intensities (dict): Dictionary containing per-category gain and loss intensities.
    - time_intervals (list): All time intervals.
    - annual_rates_of_change (list): Corresponding annual rates of change.
    - selection (dict): Contains 'Park' name for title.
    - sub_area (str): Selected sub-area for investigation.
    - total_categories (int): Total expected LULC categories in the dataset (default=9).

    Returns:
    - Displays category intensity bar plots for each significant interval.
    """

    for interval in significant_intervals:
        # Ensure the interval exists in category_intensities
        if interval not in category_intensities:
            print(f"⚠️ Warning: No data found for interval {interval}, skipping.")
            continue

        # Get intensity data for the current interval
        data = category_intensities[interval]

        # Extract categories and fill missing ones with 0s
        categories = list(data['loss_intensities'].keys())  # Available categories
        loss_intensities = [data['loss_intensities'].get(cat, 0) for cat in categories]
        gain_intensities = [data['gain_intensities'].get(cat, 0) for cat in categories]

        # Padding missing categories with zeros
        empty_slots = total_categories - len(categories)
        loss_intensities += [0] * empty_slots
        gain_intensities += [0] * empty_slots
        categories += [''] * empty_slots  # Empty strings for missing category labels

        # Find the annual rate of change for this interval
        index = time_intervals.index(interval)
        specific_uniform_intensity = annual_rates_of_change[index]

        # Position of bars on the y-axis
        y_pos = np.arange(total_categories)

        # Create the figure
        plt.figure(figsize=(10, 5))

        # Horizontal bars for losses
        plt.barh(y_pos, loss_intensities, color='tomato', edgecolor='black', height=0.4, label='Loss Intensity')

        # Horizontal bars for gains (slightly offset on the y-axis)
        plt.barh(y_pos + 0.4, gain_intensities, color='mediumseagreen', edgecolor='black', height=0.4, label='Gain Intensity')

        # Draw a dashed line for uniform intensity
        plt.axvline(x=specific_uniform_intensity, color='black', linestyle='--', label=f'Uniform Intensity {specific_uniform_intensity:.2f}%')

        # Add 'Dormant' and 'Active' labels near the uniform intensity line
        plt.text(specific_uniform_intensity - 0.25, total_categories - 0.2, 'Dormant',
                 verticalalignment='center', horizontalalignment='right', color='black', fontsize=10)
        plt.text(specific_uniform_intensity + 0.5, total_categories - 0.2, 'Active',
                 verticalalignment='center', horizontalalignment='left', color='black', fontsize=10)

        # Labels and title
        plt.xlabel('Annual Change Intensity (percent of category)', fontsize=12)
        plt.title(f"LULCC Category Intensity Analysis for {selection['Park']}'s {sub_area} Buffer Zone ({interval})", fontsize=13)
        plt.yticks(y_pos + 0.2, categories)  # Center y-ticks for categories

        plt.legend()
        plt.show()

In [None]:
# Plotting the category intensity analysis results
plot_category_intensity_analysis(significant_intervals, category_intensities, time_intervals, annual_rates_of_change, selection, sub_area_dropdown.value)

### 3.5. Calcualte the "Transition Intensity" of LULCC (Qtmj, Vtm, Rtin and Wtn)

#### 3.5.1. "Transition Intensity" calculation for Qtmj, Vtm -> Loss, transition from target category to all other categories.

In [None]:
def calculate_category_transition_intensities(significant_intervals, transition_matrices, duration_of_interval=1):
    """
    Computes category transition intensities for each significant time interval.

    Parameters:
    - significant_intervals (list): List of years with significant LULCC changes.
    - transition_matrices (dict): Dictionary containing transition matrices for each year.
    - duration_of_interval (int): Number of years in the interval (default=1).

    Returns:
    - category_transitions (dict): Dictionary with transition intensities for each category.
    """
    category_transitions = {}

    for year in significant_intervals:
        matrix = transition_matrices.get(year)
        if matrix is None:
            print(f"⚠️ Warning: No transition matrix found for {year}, skipping.")
            continue
        
        try:
            # Extract 'Final total' row, excluding the last two summary rows ('Final total' & 'Gross gain')
            final_totals = matrix.loc['Final total'].iloc[:-2]
            total_area = final_totals.sum()

            # Initialize a DataFrame to store transition intensities for the current year
            transition_intensities = pd.DataFrame(0, 
                                                  index=matrix.index[:-2], 
                                                  columns=matrix.columns[:-2].append(pd.Index(['Uniform_Intensity'])))

            for category_from in matrix.index[:-2]:  # Exclude 'Final total' and 'Gross gain' rows
                gross_loss = matrix.loc[category_from, 'Gross loss']
                area_not_m = total_area - matrix.loc['Final total', category_from]

                if area_not_m > 0:  # Avoid division by zero
                    uniform_intensity = (gross_loss / duration_of_interval) / area_not_m * 100
                    transition_intensities.loc[category_from, 'Uniform_Intensity'] = uniform_intensity

                for category_to in matrix.columns[:-2]:  # Exclude 'Initial total' and 'Gross loss' columns
                    if category_from != category_to:  # Ensure transitions between different categories
                        transition_area = matrix.loc[category_from, category_to]
                        final_total_category_to = matrix.loc['Final total', category_to]

                        if final_total_category_to > 0:  # Avoid division by zero
                            transition_intensity = (transition_area / duration_of_interval) / final_total_category_to * 100
                            transition_intensities.at[category_from, category_to] = transition_intensity

            # Store computed transition intensities for the current year
            category_transitions[year] = transition_intensities

        except KeyError as e:
            print(f"⚠️ Warning: Missing data in transition matrix for {year}: {e}")
            continue  # Skip problematic years

    return category_transitions

In [None]:
# Applying the method for calculating category transition intensities (from target category)
category_transitions = calculate_category_transition_intensities(significant_intervals, transition_matrices)

#### 3.5.2. "Transition Intensity" plots for Qtmj, Vtm -> Loss, transition from target category to all other categories.

In [None]:
def plot_transition_heatmap(significant_intervals, category_transitions, selection, sub_area):
    """
    Plots LULC transition intensity heatmaps for significant intervals.

    Parameters:
    - significant_intervals (list): List of significant years.
    - category_transitions (dict): Dictionary containing transition intensities.
    - selection (dict): Contains 'Park' name for title.
    - sub_area (str): Selected sub-area for investigation.

    Returns:
    - Displays a heatmap for each significant interval.
    """
    for year in significant_intervals:
        # Ensure the year exists in category_transitions
        if year not in category_transitions:
            print(f"⚠️ Warning: No data found for interval {year}, skipping.")
            continue

        matrix_to_plot = category_transitions[year].round(0)  # Round values for cleaner annotations

        # Create a mask to hide diagonal values
        mask = np.zeros_like(matrix_to_plot, dtype=bool)
        np.fill_diagonal(mask, True)

        # Ensure a copy of colormap before modifying
        cmap = mpl.colormaps.get_cmap('Greens').copy()
        cmap.set_bad("white")  # Set masked elements to white

        # Create the heatmap figure
        plt.figure(figsize=(12, 9))
        ax = sns.heatmap(matrix_to_plot, annot=True, fmt=".0f", cmap=cmap, linewidths=0.5, mask=mask)

        # Add vertical lines to separate categories
        for i in range(matrix_to_plot.shape[1] + 1):
            plt.axvline(x=i, color='black', linestyle='-', linewidth=1)

        # Labels and title
        plt.title(f"LULC Transition Intensity Analysis for {selection['Park']}'s {sub_area} Buffer Zone ({year})", fontsize=14)
        plt.xlabel('To Category & Uniform Intensity (percent of category)', fontsize=12)
        plt.xticks(rotation=45, ha="right")
        plt.ylabel('From Category (percent of category)', fontsize=12)

        # Show the plot
        plt.show()

In [None]:
# Plotting the category transition intensity analysis results (transition from target category)
plot_transition_heatmap(significant_intervals, category_transitions, selection, sub_area_dropdown.value)

#### 3.5.3. "Transition Intensity" calculation for Rtin, Wtn -> Gain, transition to target category from all other categories.

In [None]:
def calculate_category_transition_intensities_v2(significant_intervals, transition_matrices, duration_of_interval=1):
    """
    Computes category transition intensities for each significant time interval.

    Parameters:
    - significant_intervals (list): List of years with significant LULCC changes.
    - transition_matrices (dict): Dictionary containing transition matrices for each year.
    - duration_of_interval (int): Number of years in the interval (default=1).

    Returns:
    - category_transitions (dict): Dictionary with transition intensities for each category.
    """
    category_transitions = {}

    for year in significant_intervals:
        matrix = transition_matrices.get(year)
        if matrix is None:
            print(f"⚠️ Warning: No transition matrix found for {year}, skipping.")
            continue
        
        try:
            # Extract 'Initial total' column, excluding last two summary rows ('Final total' & 'Gross gain')
            initial_totals = matrix['Initial total'].iloc[:-2]
            total_area = initial_totals.sum()

            # Initialize DataFrame for transition intensities
            transition_intensities = pd.DataFrame(0, 
                                                  index=matrix.index[:-2], 
                                                  columns=matrix.columns[:-2].append(pd.Index(['Uniform_Intensity'])))

            for category_from in matrix.index[:-2]:  # Exclude 'Final total' and 'Gross gain' rows
                gross_gain = matrix.loc['Gross gain', category_from]
                area_not_n = total_area - matrix.loc[category_from, 'Initial total']

                if area_not_n > 0:  # Avoid division by zero
                    uniform_intensity = (gross_gain / duration_of_interval) / area_not_n * 100
                    transition_intensities.loc[category_from, 'Uniform_Intensity'] = uniform_intensity

                for category_to in matrix.columns[:-2]:  # Exclude 'Initial total' and 'Gross loss'
                    if category_from != category_to:  # Ensure transitions between different categories
                        transition_area = matrix.loc[category_from, category_to]
                        initial_total_category_from = matrix.loc[category_from, 'Initial total']

                        if initial_total_category_from > 0:  # Avoid division by zero
                            transition_intensity = (transition_area / duration_of_interval) / initial_total_category_from * 100
                            transition_intensities.at[category_from, category_to] = transition_intensity

            # Extract the 'Uniform_Intensity' column as a separate Series
            uniform_intensity_row = transition_intensities['Uniform_Intensity'].copy()

            # Remove the 'Uniform_Intensity' column
            transition_intensities.drop('Uniform_Intensity', axis=1, inplace=True)

            # Append the 'Uniform_Intensity' Series as a new row
            transition_intensities.loc['Uniform_Intensity'] = uniform_intensity_row

            # Store transition intensities matrix in dictionary
            category_transitions[year] = transition_intensities

        except KeyError as e:
            print(f"⚠️ Warning: Missing data in transition matrix for {year}: {e}")
            continue  # Skip problematic years

    return category_transitions

In [None]:
# Applying the method for calculating category transition intensities (to target category)
category_transitions = calculate_category_transition_intensities_v2(significant_intervals, transition_matrices)

#### 3.5.4. "Transition Intensity" plots for Rtin, Wtn -> Gain, transition to target category from all other categories.

In [None]:
def plot_transition_heatmap_v2(significant_intervals, category_transitions, selection, sub_area):
    """
    Plots LULC transition intensity heatmaps for significant intervals.

    Parameters:
    - significant_intervals (list): List of significant years.
    - category_transitions (dict): Dictionary containing transition intensities.
    - selection (dict): Contains 'Park' name for title.
    - sub_area (str): Selected sub-area for investigation.

    Returns:
    - Displays a heatmap for each significant interval.
    """
    for year in significant_intervals:
        # Ensure the year exists in category_transitions
        if year not in category_transitions:
            print(f"⚠️ Warning: No data found for interval {year}, skipping.")
            continue

        matrix_to_plot = category_transitions[year].round(0)  # Round values for cleaner annotations

        # Create a mask to hide diagonal values
        mask = np.zeros_like(matrix_to_plot, dtype=bool)
        np.fill_diagonal(mask, True)

        # Ensure a copy of colormap before modifying
        cmap = mpl.colormaps.get_cmap('Reds').copy()
        cmap.set_bad("white")  # Set masked elements to white

        # Create the heatmap figure
        plt.figure(figsize=(12, 9))
        ax = sns.heatmap(matrix_to_plot, annot=True, fmt=".0f", cmap=cmap, linewidths=0.5, mask=mask)

        # Add horizontal lines to separate categories
        for i in range(matrix_to_plot.shape[0] + 1):
            plt.axhline(y=i, color='black', linestyle='-', linewidth=1)

        # Labels and title
        plt.title(f"LULC Transition Intensity Analysis for {selection['Park']}'s {sub_area} Buffer Zone ({year})", fontsize=14)
        plt.xlabel('To Category (percent of category)', fontsize=12)
        plt.xticks(rotation=45, ha="right")
        plt.ylabel('From Category & Uniform Intensity (percent of category)', fontsize=12)

        # Show the plot
        plt.show()

In [None]:
# Plotting the category transition intensity analysis results (transition to target category)
plot_transition_heatmap_v2(significant_intervals, category_transitions, selection, sub_area_dropdown.value)

# 4. Plot the change hotspots for two years

### 4.1. Choose year interval to investigate

In [None]:
# Ensure significant_intervals is not empty
if 'significant_intervals' not in globals() or not significant_intervals:
    print("⚠️ Warning: No significant intervals found. Ensure data is loaded before running this cell.")
    significant_intervals = []  # Default to an empty list if missing

# Define year options for dropdown, adding a placeholder option
year_options = significant_intervals + ['Choose here...']

# Create a dropdown widget for selecting the time interval of interest
pre_post_year_dropdown = widgets.Dropdown(
    options=year_options,
    value='Choose here...',  # Default selection
    description='Year Interval:',
    disabled=False
)

# Define an update function for dropdown selection
def on_pre_post_year_change(change):
    """
    Updates the selected year interval when the dropdown value changes.

    Parameters:
    - change (dict): Contains old and new values for the widget.
    """
    print(f"Pre- and Post-year of investigation updated to: {change['new']}")

# Attach the update function to the dropdown
pre_post_year_dropdown.observe(on_pre_post_year_change, names='value')

# Display the dropdown in the Jupyter notebook
display(pre_post_year_dropdown)

# Display usage note for the multiple-selection tool
print(
    "💡 TIP: To effectively use the multi-select tool below, hold Ctrl (Windows/Linux)/Cmd (Mac) while selecting multiple options."
)

# Ensure transition_label_map is available before creating the multi-select widget
if 'transition_label_map' not in globals():
    raise ValueError("Error: 'transition_label_map' is not defined. Please ensure it is loaded before running this cell.")

# Create a multi-select widget for choosing active LULCC changes
select_widget = widgets.SelectMultiple(
    options=list(transition_label_map.values()),  # Display transition labels
    value=[],  # Default value (no selection)
    description='Active Changes:',
    disabled=False,
    layout=Layout(width='350px', height='300px')  # Custom size
)

# Display the multi-select widget
display(select_widget)

### 4.2. Set variables based on selections above

In [None]:
# Retrieve the selected year from the dropdown
selected_pre_post_year = pre_post_year_dropdown.value  # Fetch the currently selected year interval

# Ensure the selected year is valid before modifying it
if selected_pre_post_year and selected_pre_post_year != "Choose here...":
    inv_year_range = str(selected_pre_post_year).replace('_to_', '-')  # Format year range for display
else:
    inv_year_range = "Not Selected"

# Ensure transition_label_map exists before proceeding
if 'transition_label_map' not in globals():
    raise ValueError("Error: 'transition_label_map' is not defined. Please ensure it is loaded before running this cell.")

# Retrieve selected labels from the widget
selected_labels = select_widget.value  # This returns a tuple of selected label strings

# Map selected labels back to their corresponding keys
selected_keys = [key for key, value in transition_label_map.items() if value in selected_labels]

# Convert selected keys to integers, handling the case where no selections are made
try:
    transitions_of_interest = list(map(int, selected_keys)) if selected_keys else []
except ValueError as e:
    print(f"⚠️ Warning: Unable to convert selected keys to integers: {e}")
    transitions_of_interest = []

# Debugging Output
print(f"📅 Selected Year Range: {inv_year_range}")
print(f"🔄 Selected Transitions (Keys): {transitions_of_interest}")

### 4.3. Show the spatial extent of these changes on the map

In [None]:
def generate_filtered_lulcc_map(results_per_area_and_year_pairs, sub_area_dropdown, pre_post_year_dropdown, transitions_of_interest):
    """
    Generates an interactive map visualizing filtered LULCC transitions.

    Parameters:
    - results_per_area_and_year_pairs (dict): Dictionary containing area and year-specific results.
    - sub_area_dropdown (ipywidgets.Dropdown): Widget for selecting the sub-area.
    - pre_post_year_dropdown (ipywidgets.Dropdown): Widget for selecting the time interval.
    - transitions_of_interest (list): List of selected transition values.

    Returns:
    - Displays an interactive geemap.Map with the filtered transitions.
    """
    # Retrieve selected values
    selected_sub_area = sub_area_dropdown.value
    selected_year_range = pre_post_year_dropdown.value

    # Validate selection before proceeding
    if selected_year_range == "Choose here...":
        print("⚠️ Error: No year range selected. Please choose a valid year range.")
        return
    
    if selected_sub_area not in results_per_area_and_year_pairs:
        print(f"⚠️ Error: No data found for sub-area '{selected_sub_area}'. Please select a valid area.")
        return

    if selected_year_range not in results_per_area_and_year_pairs[selected_sub_area]:
        print(f"⚠️ Error: No data found for year range '{selected_year_range}' in sub-area '{selected_sub_area}'.")
        return
    
    if not transitions_of_interest:
        print("⚠️ Warning: No transitions selected. The map may be empty.")
    
    # Define 'combined' image based on selected sub-area and year
    combined = results_per_area_and_year_pairs[selected_sub_area][selected_year_range]

    # Start with a condition that's always False
    mask = ee.Image(0)

    # Dynamically update the mask based on selected transitions
    for transition in transitions_of_interest:
        mask = mask.Or(combined.eq(transition))

    # Apply the mask to filter selected transitions
    filtered_transitions = combined.updateMask(mask)

    # Define visualization parameters
    vis_params = {
        'min': 0,
        'max': max(transitions_of_interest) if transitions_of_interest else 1,  # Avoid max() error
        'palette': ['red'] * len(transitions_of_interest) if transitions_of_interest else ['red']
    }

    # Initialize the geemap Map
    Map = geemap.Map(basemap='Esri.WorldImagery')

    # Add the filtered transitions layer to the map
    Map.addLayer(filtered_transitions, vis_params, 'Filtered Transitions')

    # Display the map
    display(Map)

In [None]:
# Display the LULC changes spatially on the map
generate_filtered_lulcc_map(results_per_area_and_year_pairs, sub_area_dropdown, pre_post_year_dropdown, transitions_of_interest)

In [None]:
#################END OF CODE#################