In [None]:
# Load library imports
import sys
import torch
import random
import logging
import numpy as np
import seaborn as sns
import geopandas as gpd
from scipy.stats import skew
import matplotlib.pyplot as plt
from shapely.geometry import Point


# Load project Imports
from src.utils.config_loader import load_project_config, deep_format, expanduser_tree
from src.graph_building.graph_construction import build_mesh, \
    define_catchment_polygon
from src.visualisation.mapped_visualisations import plot_interactive_mesh, \
    plot_interactive_mesh_colour_coded, plot_directional_mesh, \
        plot_geology_layers_interactive
from src.data_ingestion.static_data_ingestion import load_land_cover_data, \
    load_process_elevation_data, derive_slope_data, get_geo_feats, save_final_static_data, \
        load_and_process_geology_layers, ingest_and_process_productivity, \
            derive_distance_to_river, plot_nearest_river_for_node, load_process_soil_hydrology
from src.graph_building.data_merging import reorder_static_columns

In [None]:
# Set up logger config
logging.basicConfig(
    level=logging.INFO,
   format='%(levelname)s - %(message)s',
#    format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
    handlers=[logging.StreamHandler(sys.stdout)]
)

# Set up logger for file and load config file for paths and params
logger = logging.getLogger(__name__)
config = load_project_config(config_path="config/project_config.yaml")
notebook = True

# Set up root directory paths in config
raw_data_root = config["global"]["paths"]["raw_data_root"]
results_root = config["global"]["paths"]["results_root"]

# Reformat config roots
config = deep_format(
    config,
    raw_data_root=raw_data_root,
    results_root=results_root
)
config = expanduser_tree(config)

In [None]:
# Set up seeding to define global states
random_seed = config["global"]["pipeline_settings"]["random_seed"]
random.seed(random_seed)
np.random.seed(random_seed)
torch.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Define notebook demo catchment
catchments_to_process = config["global"]["pipeline_settings"]["catchments_to_process"]
catchment = catchments_to_process[0]
run_defra_API_calls = config["global"]["pipeline_settings"]["run_defra_api"]

logger.info(f"Show Notebook Outputs: {notebook}")
logger.info(f"Notebook Demo Catchment: {catchment.capitalize()}")

Note: CAMELS-GB is a rich data resource at the correct resolution but the time series data ends in 2015 so is insufficient for this model. Some of the static features in the dataset are still applicable:

- Land cover (e.g., % Urban, % Arable, etc.)
- Elevation and slope (from DEMs in the CAMELS dataset)
- Soil type / BFIHOST
- Hydrogeological classifications (e.g., aquifer classes)
- Geographic coordinates (for snapping)

### Build mesh ###

To align ingested data with

In [None]:
# Select Catchment area from country wide gdf
define_catchment_polygon(
    england_catchment_gdf_path=config[catchment]['paths']['gis_catchment_boundary'],
    target_mncat=config[catchment]['target_mncat'],
    catchment=catchment,
    polygon_output_path=config[catchment]['paths']['gis_catchment_dir']
)

In [None]:
# NB: mesh_nodes_gdf are the centroid coords, mesh_cells_gdf_polygons are polygons for e.g. averaging area
mesh_nodes_table, mesh_nodes_gdf, mesh_cells_gdf_polygons, catchment_polygon = build_mesh(
    shape_filepath=config[catchment]['paths']['gis_catchment_dir'],
    output_path=config[catchment]['paths']['mesh_nodes_output'],
    catchment=catchment,
    grid_resolution=config[catchment]['preprocessing']['graph_construction']['grid_resolution']
)

### Land Cover Data Ingestion and Preprocessing ###

In [None]:
# UKCEH LCM2023
agg_land_cover_df = load_land_cover_data(
    tif_path=config[catchment]['paths']['raw_land_cover_path'],
    csv_path=config[catchment]['paths']['land_cover_csv_path'],
    catchment=catchment,
    shape_filepath=config[catchment]['paths']['gis_catchment_dir']
)

display(agg_land_cover_df)

**UKCEH Aggregate Class (AC) -> Mapped Classes**  
    1. Broadleaf woodland -> 1. Woodland  
    2. Coniferous woodland -> 1. Woodland  
    3. Arable -> 2. Arable  
    4. Improved grassland -> 3. Improved grassland  
    5. Semi-natural grassland -> 4. Semi-natural grassland  
    6. Mountain, heath and bog -> 5. Mountain, heath and bog  
    7. Saltwater -> 6. Water & Coastal  
    8. Freshwater -> 6. Water & Coastal  
    9. Coastal -> 6. Water & Coastal  
    10. Built-up areas and gardens -> 7. Urban & Suburban  

In [None]:
logger.info(f"Preparing land cover data for visual check...")

# Convert land_cover_df to a GeoDataFrame and create a geometry column using the 'lon' and 'lat' coordinates
geometry_lc_for_plot = [Point(xy) for xy in zip(agg_land_cover_df['lon'], agg_land_cover_df['lat'])]
land_cover_gdf_for_plot = gpd.GeoDataFrame(agg_land_cover_df.copy(), geometry=geometry_lc_for_plot, crs="EPSG:4326")
    
land_cover_labels = {1: "Woodland", 2: "Arable", 3: "Improved grassland",
                     4: "Semi-Natural Grassland", 5: "Mountain, heath and bog",
                     6: "Water & Coastal", 7: "Urban and Suburban"}

land_cover_colors = {1: "#665142", 2: "#a2a182", 3: "#9CBD8B", 4: "#647D57",
                     5: "#b7b1aa", 6: "#4d85ba", 7: "#8e412e"}

# Plot bounding box of data to check spatial geometry (notebook only)
land_cover_map = plot_interactive_mesh_colour_coded(
    mesh_nodes_gdf=land_cover_gdf_for_plot,
    catchment_polygon=catchment_polygon,
    map_blue=config['global']['visualisations']['maps']['map_blue'],
    esri=config['global']['visualisations']['maps']['esri'],
    esri_attr=config['global']['visualisations']['maps']['esri_attr'],
    static_output_path=f"{config[catchment]['visualisations']['maps']['static_mesh_map_output']}_land_cover",
    interactive_output_path=f"{config[catchment]['visualisations']['maps']['interactive_mesh_map_output']}_land_cover",
    category_colors=land_cover_colors,
    category_labels=land_cover_labels,
    grid_resolution=config[catchment]['preprocessing']['graph_construction']['grid_resolution'],
    interactive=config['global']['visualisations']['maps']['display_interactive_map']
)

# Display map in notebook
land_cover_map

Merge into nodes mesh

In [None]:
merged_gdf_nodes_landuse = mesh_nodes_gdf.merge(
    agg_land_cover_df[['easting', 'northing', 'land_cover_code']],
    on=['easting', 'northing'],
    how='left'  # left join to keep all centroids, even NaN
)

In [None]:
display(merged_gdf_nodes_landuse)

In [None]:
merged_gdf_nodes_landuse['land_cover_code'].value_counts()

### Elevation Data Ingestion and Preprocessing ###

In [None]:
# DIGIMAPS (via OS Terrain 5 / Terrain 50)
elevation_gdf_polygon, clipped_dtm = load_process_elevation_data(
    dir_path=config[catchment]['paths']['elevation_dir_path'],
    csv_path=config[catchment]['paths']['elevation_tif_path'],
    catchment_gdf=catchment_polygon,
    mesh_cells_gdf_polygons=mesh_cells_gdf_polygons,
    catchment=catchment,
    elev_max=config[catchment]['preprocessing']['catchment_max_elevation'],
    elev_min=config[catchment]['preprocessing']['catchment_min_elevation'],
    output_geojson_dir=config[catchment]['paths']['output_polygon_dir'],
    elevation_geojson_path=config[catchment]['paths']['elevation_geojson_path'],
    grid_resolution=config[catchment]['preprocessing']['graph_construction']['grid_resolution']
)

In [None]:
elevation_gdf_polygon

In [None]:
print(f"Skewness: {skew(mesh_cells_gdf_polygons['mean_elevation']):.3f}")

sns.histplot(mesh_cells_gdf_polygons['mean_elevation'], kde=True)
plt.title("Elevation Distribution")
plt.xlabel("Mean Elevation (mAOD)")
plt.show()


Moderate skew but not concerning enough at this point to require transformation especially as it wil be scaled. Will return to after first model iteration.

Merge into mesh

In [None]:
merged_gdf_nodes_elevation = merged_gdf_nodes_landuse.merge(
    elevation_gdf_polygon[['node_id', 'mean_elevation', 'polygon_geometry']],
    on='node_id',
    how='left'  # left join to keep all centroids, even NaN
)

In [None]:
display(merged_gdf_nodes_elevation)

### Geological Maps Data Ingestion ###

In [None]:
# DIGIMAPS (50K BGS data via Geology Digimap)

# Load and explore geology data
mesh_geology_df = load_and_process_geology_layers(
    base_dir=config[catchment]["paths"]["geology_dir"],
    mesh_crs=mesh_cells_gdf_polygons.crs,
    columns_of_interest={"bedrock": ["RCS_ORIGIN"], "superficial": ["RCS_D"]},
    mesh_cells_gdf_polygons=mesh_cells_gdf_polygons,
    perm_dir=config[catchment]["paths"]["permeability_dir"],
    geo_output_dir=config[catchment]["paths"]["geology_df"],
    catchment=catchment
)

mesh_geology_df

In [None]:
feature_category_colors, feature_category_labels, layer_labels = get_geo_feats()
geology_map = plot_geology_layers_interactive(
    mesh_geology_df=mesh_geology_df,
    catchment_polygon=catchment_polygon,
    esri=config['global']['visualisations']['maps']['esri'],
    esri_attr=config['global']['visualisations']['maps']['esri_attr'],
    output_path=config[catchment]["visualisations"]["maps"]["interactive_mesh_map_output"],
    feature_columns=['geo_superficial_type','geo_bedrock_type'],
    category_colors=feature_category_colors,
    category_labels=feature_category_labels,
    map_blue=config['global']['visualisations']['maps']['map_blue'],
    layer_labels=layer_labels
)

geology_map

### Merge Geology Data In

In [None]:
merged_gdf_nodes_geology = merged_gdf_nodes_elevation.merge(
    mesh_geology_df[['geo_bedrock_type', 'geo_superficial_type', 'bedrock_flow_type',
                        'bedrock_perm_avg', 'superficial_flow_type', 'superficial_perm_avg',
                        'node_id']],
    on='node_id',
    how='left'  # left join to keep all centroids, even if NaN
)

logger.info(f"Geology data snapped to mesh nodes (centroids).\n")

### Derive slope data from DEM ###

In [None]:
# Derived from higher resolution DEM data in previous step
slope_gdf, directional_edge_weights = derive_slope_data(
    high_res_raster=clipped_dtm,
    mesh_cells_gdf_polygons=mesh_cells_gdf_polygons,
    catchment=catchment,
    direction_output_path=config[catchment]['paths']['direction_edge_weights_path'],
    slope_output_path=config[catchment]['paths']['slope_path']
)

In [None]:
print(f"Slope (Degrees): {skew(slope_gdf['mean_slope_degrees']):.3f}")

sns.histplot(slope_gdf['mean_slope_degrees'], kde=True)
plt.title("Slope Distribution")
plt.xlabel("Mean Slope (Degrees)")
plt.show()


In [None]:
print(f"Slope Aspect (Sine): {skew(slope_gdf['mean_aspect_sin']):.3f}")

sns.histplot(slope_gdf['mean_aspect_sin'], kde=True)
plt.title("Slope Aspect Sine Distribution")
plt.xlabel("Mean Slope Aspect (Sine)")
plt.show()

In [None]:
print(f"Slope Aspect (Cosine): {skew(slope_gdf['mean_aspect_cos']):.3f}")

sns.histplot(slope_gdf['mean_aspect_cos'], kde=True)
plt.title("Slope Aspect Cosine Distribution")
plt.xlabel("Mean Slope Aspect (Cosine)")
plt.show()

In [None]:
display(slope_gdf)

### Soil Type Data Ingestion ###

In [None]:
# CEH's Grid-to-Grid soil maps / HOST soil classes

### Aquifer Productivity Data Ingestion ###

In [None]:
# Aquifer Productivity [BGS 625k Hydrogeological Data]

productivity_gdf = ingest_and_process_productivity(
    productivity_dir=config[catchment]["paths"]["productivity_dir"],
    csv_path=config[catchment]['paths']['productivity_csv_path'],
    mesh_cells_gdf_polygons=mesh_cells_gdf_polygons,
    catchment=catchment   
)
        
logger.info(f"Pipeline step 'Load and preprocess aquifer productvity data' complete for {catchment} catchment.\n")

### Depth to Groundwater Data [BGS] ###

In [None]:
# import rasterio
# import matplotlib.pyplot as plt

# # Path to the folder with .adf files (Raster format)
# raster_path = "data/01_raw/eden/static/groundwater_levels/gw_level/"

# # Open the raster (it should detect the .adf files automatically)
# with rasterio.open(raster_path) as src:
#     print("CRS:", src.crs)
#     print("Bounds:", src.bounds)
#     print("Resolution:", src.res)
#     print("Data Type:", src.dtypes)
#     gwl_raster = src.read(1)  # Read first band
#     gwl_profile = src.profile

# print(gwl_raster.shape)
# print(gwl_profile)

# # Quick visualisation
# plt.imshow(gwl_raster, cmap='viridis')
# plt.colorbar(label="Groundwater level (?)")
# plt.title("Groundwater Level Raster")
# plt.show()


In [None]:
# import rasterio
# from shapely.geometry import box

# with rasterio.open(raster_path) as src:
#     bounds = src.bounds
#     crs = src.crs

# # Print Raster Bounds
# print(bounds)

In [None]:
# import geopandas as gpd
# from shapely.geometry import box
# import numpy as np

# def create_grid(bounds, grid_size=1000):
#     xmin, ymin, xmax, ymax = bounds
#     rows = int(np.ceil((ymax - ymin) / grid_size))
#     cols = int(np.ceil((xmax - xmin) / grid_size))
    
#     polygons = []
#     node_ids = []
#     for i in range(rows):
#         for j in range(cols):
#             x0 = xmin + j * grid_size
#             y0 = ymin + i * grid_size
#             x1 = x0 + grid_size
#             y1 = y0 + grid_size
#             polygons.append(box(x0, y0, x1, y1))
#             node_ids.append(i * cols + j)

#     grid = gpd.GeoDataFrame({'node_id': node_ids, 'geometry': polygons}, crs=crs)
#     return grid

# # Use raster bounds to generate grid
# grid_gdf = create_grid(bounds)


In [None]:
# from rasterstats import zonal_stats

# stats = zonal_stats(grid_gdf, raster_path, stats=['mean'], geojson_out=True)
# grid_with_gwl = gpd.GeoDataFrame.from_features(stats)

# print(grid_with_gwl[['node_id', 'mean']].dropna().head())


### Distance from River (Derived) ###

In [None]:
import os
import geopandas as gpd
import matplotlib.pyplot as plt

# --- Plot Bounding Box to Check ---

rivers_dir = config[catchment]["paths"]["rivers_dir"]
watercourse_path = os.path.join(rivers_dir, 'WatercourseLink.shp')
minx, miny, maxx, maxy = catchment_polygon.total_bounds
gdf = gpd.read_file(watercourse_path, bbox=(minx, miny, maxx, maxy))
gdf = gdf[gdf['fictitious'] == 'false']

# Create the plot
fig, ax = plt.subplots(1, 1, figsize=(10, 10))

# Plot the GeoDataFrame
gdf.plot(ax=ax)

# Set a title
ax.set_title("Map of Shapefile")

# Add labels for the axes (optional but good practice)
ax.set_xlabel("Easting (m)")
ax.set_ylabel("Northing (m)")

# Display the map
plt.show()

In [None]:
# Distance from River (Derived) [OS Open Rivers]

dist_to_river_gdf, rivers_real = derive_distance_to_river(
    rivers_dir=config[catchment]["paths"]["rivers_dir"],
    csv_path=config[catchment]['paths']['rivers_csv_path'],
    catchment=catchment,
    mesh_cells_gdf_polygons=mesh_cells_gdf_polygons,
    catchment_polygon=catchment_polygon,
    mesh_nodes_gdf=mesh_nodes_gdf
)

logger.info(f"Pipeline step 'Derive distance from river' complete for {catchment} catchment.\n")

In [None]:
dist_to_river_gdf

In [None]:
# --- SINGLE NODE TEST PLOT! ---
node_to_plot = mesh_nodes_gdf.iloc[[1350]]
plot_nearest_river_for_node(node_to_plot, rivers_real)

### Superficial Thickness Data Ingestion ###
  
Insufficient Coverage. Skipping.  

In [None]:
# import os
# import rasterio
# import pandas as pd

# def load_and_preprocess_sup_thickness(sup_thickness_dir: str, csv_path: str, catchment: str,
#                                       mesh_cells_gdf_polygons: gpd.GeoDataFrame, 
#                                       catchment_polygon: gpd.GeoDataFrame,
#                                       mesh_nodes_gdf: gpd.GeoDataFrame):
#     """
#     Ingest and preprocess superficial thickness data for each centroid in a catchment mesh.
#     """
#     logger.info(f"Loading superficial thickness data for {catchment} catchment...\n")
    
#     # Read in data
#     file_path = os.path.join(sup_thickness_dir, "w001001.adf")
#     with rasterio.open(file_path) as src:
        
#         # Read in first band of main data
#         thickness_data = src.read(1)
        
#         # Read in metadata
#         profile = src.profile
#         crs = src.crs
#         transform = src.transform
    
#         logger.info(f"Raster shape: {thickness_data.shape}")
#         logger.info(f"CRS: {crs}")
#         logger.info(f"Transform:\n{transform}\n")
        
#         # Reproject nodes to crs
#         logger.info(f"Reprojecting mesh nodes from {mesh_nodes_gdf.crs} to match raster CRS {crs}...")
#         mesh_nodes_reprojected = mesh_nodes_gdf.to_crs(crs)
#         logger.info(f"Mesh nodes reprojected to {mesh_nodes_reprojected.crs}\n")
        
#         # Get the coordinates of each reprojected node
#         coords = [(x, y) for x, y in zip(mesh_nodes_reprojected.geometry.x, mesh_nodes_reprojected.geometry.y)]
        
#         # Sample the raster at each coordinate
#         thickness_values = [x for x in src.sample(coords)]

#         # The result from src.sample is a list of arrays, flatten it
#         thickness_values = [val[0] for val in thickness_values]
    
#     # --- NEW LOGIC: Create a new DataFrame with the extracted values ---
#     sup_thickness_df = pd.DataFrame({
#         'node_id': mesh_nodes_gdf['node_id'],
#         'superficial_thickness': thickness_values
#     })
    
#     # Handle nodata values (e.g., -3.4028235e+38) by replacing them with NaN
#     nodata_value = -3.4028235e+38
#     sup_thickness_df['superficial_thickness'] = sup_thickness_df['superficial_thickness'].replace(nodata_value, np.nan)
    
#     logger.info(f"Extracted superficial thickness for {len(sup_thickness_df)} nodes.")
#     logger.info(f"Minimum thickness: {sup_thickness_df['superficial_thickness'].min():.2f} m")
#     logger.info(f"Maximum thickness: {sup_thickness_df['superficial_thickness'].max():.2f} m\n")

#     return sup_thickness_df

# # Superficial Thickness [BGS]

# superficial_thickness_df = load_and_preprocess_sup_thickness(
#     sup_thickness_dir=config[catchment]['paths']['sup_thickness_dir'],
#     csv_path=config[catchment]['paths']['sup_thickness_csv_path'],
#     catchment=catchment,
#     mesh_cells_gdf_polygons=mesh_cells_gdf_polygons,
#     catchment_polygon=catchment_polygon,
#     mesh_nodes_gdf=mesh_nodes_gdf
# )

# logger.info(f"Pipeline step 'Load Superficial Thickness Data' complete for {catchment} catchment.\n")

# superficial_thickness_df

# import geopandas as gpd
# import matplotlib.pyplot as plt
# import numpy as np
# from rasterio.plot import show

# def plot_superficial_thickness_catchment(sup_thickness_dir: str, rivers_gdf: gpd.GeoDataFrame,
#                                         mesh_nodes_gdf: gpd.GeoDataFrame, catchment: str):
#     """
#     Plots the superficial thickness raster data, zoomed to the catchment area.
#     """
#     # Calculate the combined bounding box of the catchment data
#     # This ensures both the river network and nodes are visible
#     minx_rivers, miny_rivers, maxx_rivers, maxy_rivers = rivers_gdf.total_bounds
#     minx_nodes, miny_nodes, maxx_nodes, maxy_nodes = mesh_nodes_gdf.total_bounds
    
#     # Get the overall bounds for the plot and add a small padding
#     minx = min(minx_rivers, minx_nodes)
#     miny = min(miny_rivers, miny_nodes)
#     maxx = max(maxx_rivers, maxx_nodes)
#     maxy = max(maxy_rivers, maxy_nodes)
    
#     padding_x = (maxx - minx) * 0.1 # 10% padding
#     padding_y = (maxy - miny) * 0.1 # 10% padding

#     # Open the raster file
#     # file_path = os.path.join(sup_thickness_dir, "w001001.adf")
#     file_path = "data/01_raw/eden/static/superficial_thickness/basic/bstm_v5/bstm_gb/w001001.adf"
#     with rasterio.open(file_path) as src:
        
#         # Read the first band of the raster data
#         thickness_data = src.read(1)
        
#         # Get metadata for plotting
#         crs = src.crs
#         transform = src.transform
#         nodata_value = src.nodata
        
#         # Make the nodata values transparent
#         masked_data = np.ma.masked_equal(thickness_data, nodata_value)

#         fig, ax = plt.subplots(figsize=(12, 12))
        
#         # Plot the raster using rasterio's show() function
#         show(
#             masked_data,
#             transform=transform,
#             ax=ax,
#             cmap='terrain',
#             title=f"Superficial Thickness for {catchment} Catchment"
#         )
        
#         # Plot the river network and mesh nodes on top
#         rivers_gdf_proj = rivers_gdf.to_crs(crs)
#         mesh_nodes_gdf_proj = mesh_nodes_gdf.to_crs(crs)
        
#         rivers_gdf_proj.plot(ax=ax, color='blue', linewidth=0.5, label='River Network')
#         mesh_nodes_gdf_proj.plot(ax=ax, color='yellow', markersize=5, label='Mesh Nodes')
        
#         # --- NEW: Set the plot bounds to match the catchment's bounds with padding ---
#         ax.set_xlim(minx - padding_x, maxx + padding_x)
#         ax.set_ylim(miny - padding_y, maxy + padding_y)

#         # Add labels, legend, and grid
#         ax.set_xlabel("Easting (m)")
#         ax.set_ylabel("Northing (m)")
#         ax.legend(loc='lower left')
#         ax.grid(True)
        
#         plt.show()

# # Run plotting
# plot_superficial_thickness_catchment(
#     sup_thickness_dir=config[catchment]['paths']['sup_thickness_dir'],
#     rivers_gdf=rivers_real,
#     mesh_nodes_gdf=mesh_nodes_gdf,
#     catchment=catchment
# )

### Soil Hydrology Data Ingestion ###

In [None]:
# Call soil hydrology load and process

soil_hydrology_df = load_process_soil_hydrology(
    soil_dir=config[catchment]["paths"]["soil_dir"],
    csv_path=config[catchment]['paths']['soil_csv_path'],
    catchment=catchment,
    mesh_cells_gdf_polygons=mesh_cells_gdf_polygons,
    catchment_polygon=catchment_polygon,
    mesh_nodes_gdf=mesh_nodes_gdf
)

logger.info(f"Pipeline step 'Load Soil Hydrology Data' complete for {catchment} catchment.\n")

Using NSRI Soilscapes Instead for clearer documentation and metadata for aggregation:

In [None]:
# Source: NSRI Soilscapes (https://mapapps2.bgs.ac.uk/ukso/home.html

def _get_soilscape_mappings():
    """
    Defined Soilscapes soil mappings to aggregated and better balanced classes. Using
    See: https://nora.nerc.ac.uk/id/eprint/7369/1/IH_126.pdf for more details on classes.
    """
    # A mapping from detailed Soilscapes names to high-level drainage categories
    soilscapes_to_drainage_map = {
        'Blanket bog peat soils': 'Very wet/Peat soils',
        'Fen peat soils': 'Very wet/Peat soils',
        'Freely draining acid loamy soils over rock': 'freely_draining_soils',
        'Freely draining floodplain soils': 'freely_draining_soils',
        'Freely draining lime-rich loamy soils': 'freely_draining_soils',
        'Freely draining sandy Breckland soils': 'freely_draining_soils',
        'Freely draining slightly acid but base-rich soils': 'freely_draining_soils',
        'Freely draining slightly acid loamy soils': 'freely_draining_soils',
        'Freely draining slightly acid sandy soils': 'freely_draining_soils',
        'Freely draining very acid sandy and loamy soils': 'freely_draining_soils',
        'Lime-rich loamy and clayey soils with impeded drainage': 'Impeded drainage soils',
        'Loamy and clayey floodplain soils with naturally high groundwater': 'Seasonally wet soils',
        'Loamy and clayey soils of coastal flats with naturally high groundwater': 'Seasonally wet soils',
        'Loamy and sandy soils with naturally high groundwater and a peaty surface': 'Very wet/Peat soils',
        'Loamy soils with naturally high groundwater': 'Seasonally wet soils',
        'Naturally wet very acid sandy and loamy soils': 'Seasonally wet soils',
        'Raised bog peat soils': 'Very wet/Peat soils',
        'Restored soils mostly from quarry and opencast spoil': 'freely_draining_soils',
        'Saltmarsh soils': 'Seasonally wet soils',
        'Sand dune soils': 'freely_draining_soils',
        'Shallow lime-rich soils over chalk or limestone': 'freely_draining_soils',
        'Shallow very acid peaty soils over rock': 'Very wet/Peat soils',
        'Slightly acid loamy and clayey soils with impeded drainage': 'Impeded drainage soils',
        'Slowly permeable seasonally wet acid loamy and clayey soils': 'Seasonally wet soils',
        'Slowly permeable seasonally wet slightly acid but base-rich loamy and clayey soils': 'Seasonally wet soils',
        'Slowly permeable wet very acid upland soils with a peaty surface': 'Very wet/Peat soils',
        'Very acid loamy upland soils with a wet peaty surface': 'Very wet/Peat soils',
        'Unsurveyed/Urban': 'Other',
        'Water body': 'Other'
    }

    return soilscapes_to_drainage_map

# Apply mapping
# soil_data['drainage_category'] = soil_data['soilscape_name'].map(soilscapes_to_drainage)

### Aquifer Type Data Ingestion ###

In [None]:
# DEFRA / British Geological Survey (BGS)


### Aquifer Transmissivity Data Ingestion ###

In [None]:
# DEFRA / British Geological Survey (BGS)

### Merge Slope Data to end In

In [None]:
# # Snap Slope to Mesh

# merged_gdf_nodes_slope = merged_gdf_nodes_geology.merge(
#     slope_gdf[['node_id', 'mean_slope_degrees', 'mean_aspect_sin', 'mean_aspect_cos']],
#     on='node_id',
#     how='left'  # left join to keep all centroids, even if NaN
# )

# logger.info(f"Slope degrees and sinusoidal aspect data snapped to mesh nodes (centroids).\n")

# # Snap Soil Hydrology to Mesh

# merged_gdf_nodes_soil = merged_gdf_nodes_slope.merge(
#     soil_hydrology_df[['node_id', 'HOST_soil_class']],
#     on='node_id',
#     how='left'  # left join to keep all centroids, even if NaN
# )

# logger.info(f"Soil Hydrology data snapped to mesh nodes (centroids).\n")

# # Snap Aquifer Productivity to Mesh

# merged_gdf_nodes_productivity = merged_gdf_nodes_soil.merge(
#     productivity_gdf[['node_id', 'aquifer_productivity']],
#     on='node_id',
#     how='left'  # left join to keep all centroids, even if NaN
# )

# logger.info(f"Aquifer Productivity data snapped to mesh nodes (centroids).\n")

# # Snap Distance from River to Mesh

# static_features = merged_gdf_nodes_productivity.merge(
#     dist_to_river_gdf[['node_id', 'distance_to_river']],
#     on='node_id',
#     how='left'  # left join to keep all centroids, even if NaN
# )

# logger.info(f"Distance from river data snapped to mesh nodes (centroids).\n")

In [None]:
# final_static_df = reorder_static_columns(static_features)  # TODO: Update as more features added
# save_final_static_data(
#     static_features=final_static_df,
#     dir_path=config[catchment]["paths"]["final_df_path"]
# )

# logger.info(f"Full static feature dataframe finalised and ready to merge into main model dataframe.\n")

In [None]:
# display(static_features)

### Checking the directional weights are logical and plausible based on the map: ###

In [None]:
directional_map = plot_directional_mesh(
    directional_edge_weights_gdf=directional_edge_weights,
    catchment_polygon=catchment_polygon,
    output_path=config[catchment]['visualisations']['maps']['interactive_directional_map_output'],
    esri=config['global']['visualisations']['maps']['esri'],
    esri_attr=config['global']['visualisations']['maps']['esri_attr']
)

logger.info(f"Pipeline step 'Interactive Directional Mesh Mapping' complete for {catchment} catchment.")

# # Show map in notebook
# directional_map

In [None]:
display(directional_edge_weights[['mean_slope_dx', 'mean_slope_dy', 'easting', 'northing']])