In [None]:
# Load library imports
import sys
import torch
import random
import logging
import numpy as np
import seaborn as sns
import geopandas as gpd
from scipy.stats import skew
import matplotlib.pyplot as plt
from shapely.geometry import Point


# Load project Imports
from src.utils.config_loader import load_project_config
from src.graph_building.graph_construction import build_mesh, \
    define_catchment_polygon
from src.visualisation.mapped_visualisations import plot_interactive_mesh, \
    plot_interactive_mesh_colour_coded, plot_directional_mesh, \
        plot_geology_layers_interactive
from src.data_ingestion.static_data_ingestion import load_land_cover_data, \
    load_process_elevation_data, derive_slope_data, get_geo_feats, \
        load_and_process_geology_layers
from src.graph_building.data_merging import reorder_static_columns

In [None]:
# Set up logger config
logging.basicConfig(
    level=logging.INFO,
   format='%(levelname)s - %(message)s',
#    format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
    handlers=[logging.StreamHandler(sys.stdout)]
)

# Set up logger for file and load config file for paths and params
logger = logging.getLogger(__name__)
config = load_project_config(config_path="config/project_config.yaml")
notebook = True

# Set up seeding to define global states
random_seed = config["global"]["pipeline_settings"]["random_seed"]
random.seed(random_seed)
np.random.seed(random_seed)
torch.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Define notebook demo catchment
catchments_to_process = config["global"]["pipeline_settings"]["catchments_to_process"]
catchment = catchments_to_process[0]
run_defra_API_calls = config["global"]["pipeline_settings"]["run_defra_api"]

logger.info(f"Show Notebook Outputs: {notebook}")
logger.info(f"Notebook Demo Catchment: {catchment.capitalize()}")

Note: CAMELS-GB is a rich data resource at the correct resolution but the time series data ends in 2015 so is insufficient for this model. Some of the static features in the dataset are still applicable:

- Land cover (e.g., % Urban, % Arable, etc.)
- Elevation and slope (from DEMs in the CAMELS dataset)
- Soil type / BFIHOST
- Hydrogeological classifications (e.g., aquifer classes)
- Geographic coordinates (for snapping)

### Build mesh ###

To align ingested data with

In [None]:
# Select Catchment area from country wide gdf
define_catchment_polygon(
    england_catchment_gdf_path=config[catchment]['paths']['gis_catchment_boundary'],
    target_mncat=config[catchment]['target_mncat'],
    catchment=catchment,
    polygon_output_path=config[catchment]['paths']['gis_catchment_dir']
)

In [None]:
# NB: mesh_nodes_gdf are the centroid coords, mesh_cells_gdf_polygons are polygons for e.g. averaging area
mesh_nodes_table, mesh_nodes_gdf, mesh_cells_gdf_polygons, catchment_polygon = build_mesh(
    shape_filepath=config[catchment]['paths']['gis_catchment_dir'],
    output_path=config[catchment]['paths']['mesh_nodes_output'],
    catchment=catchment,
    grid_resolution=config[catchment]['preprocessing']['graph_construction']['grid_resolution']
)

### Land Cover Data Ingestion and Preprocessing ###

In [None]:
# UKCEH LCM2023
agg_land_cover_df = load_land_cover_data(
    tif_path=config[catchment]['paths']['raw_land_cover_path'],
    csv_path=config[catchment]['paths']['land_cover_csv_path'],
    catchment=catchment,
    shape_filepath=config[catchment]['paths']['gis_catchment_dir']
)

display(agg_land_cover_df)

**UKCEH Aggregate Class (AC) -> Mapped Classes**  
    1. Broadleaf woodland -> 1. Woodland  
    2. Coniferous woodland -> 1. Woodland  
    3. Arable -> 2. Arable  
    4. Improved grassland -> 3. Improved grassland  
    5. Semi-natural grassland -> 4. Semi-natural grassland  
    6. Mountain, heath and bog -> 5. Mountain, heath and bog  
    7. Saltwater -> 6. Water & Coastal  
    8. Freshwater -> 6. Water & Coastal  
    9. Coastal -> 6. Water & Coastal  
    10. Built-up areas and gardens -> 7. Urban & Suburban  

In [None]:
logger.info(f"Preparing land cover data for visual check...")

# Convert land_cover_df to a GeoDataFrame and create a geometry column using the 'lon' and 'lat' coordinates
geometry_lc_for_plot = [Point(xy) for xy in zip(agg_land_cover_df['lon'], agg_land_cover_df['lat'])]
land_cover_gdf_for_plot = gpd.GeoDataFrame(agg_land_cover_df.copy(), geometry=geometry_lc_for_plot, crs="EPSG:4326")
    
land_cover_labels = {1: "Woodland", 2: "Arable", 3: "Improved grassland",
                     4: "Semi-Natural Grassland", 5: "Mountain, heath and bog",
                     6: "Water & Coastal", 7: "Urban and Suburban"}

land_cover_colors = {1: "#665142", 2: "#a2a182", 3: "#9CBD8B", 4: "#647D57",
                     5: "#b7b1aa", 6: "#4d85ba", 7: "#8e412e"}

# Plot bounding box of data to check spatial geometry (notebook only)
land_cover_map = plot_interactive_mesh_colour_coded(
    mesh_nodes_gdf=land_cover_gdf_for_plot,
    catchment_polygon=catchment_polygon,
    map_blue=config['global']['visualisations']['maps']['map_blue'],
    esri=config['global']['visualisations']['maps']['esri'],
    esri_attr=config['global']['visualisations']['maps']['esri_attr'],
    static_output_path=f"{config[catchment]['visualisations']['maps']['static_mesh_map_output']}_land_cover",
    interactive_output_path=f"{config[catchment]['visualisations']['maps']['interactive_mesh_map_output']}_land_cover",
    category_colors=land_cover_colors,
    category_labels=land_cover_labels,
    grid_resolution=config[catchment]['preprocessing']['graph_construction']['grid_resolution'],
    interactive=config['global']['visualisations']['maps']['display_interactive_map']
)

# Display map in notebook
land_cover_map

Merge into nodes mesh

In [None]:
merged_gdf_nodes_landuse = mesh_nodes_gdf.merge(
    agg_land_cover_df[['easting', 'northing', 'land_cover_code']],
    on=['easting', 'northing'],
    how='left'  # left join to keep all centroids, even NaN
)

In [None]:
display(merged_gdf_nodes_landuse)

In [None]:
merged_gdf_nodes_landuse['land_cover_code'].value_counts()

### Elevation Data Ingestion and Preprocessing ###

In [None]:
# DIGIMAPS (via OS Terrain 5 / Terrain 50)
elevation_gdf_polygon, clipped_dtm = load_process_elevation_data(
    dir_path=config[catchment]['paths']['elevation_dir_path'],
    csv_path=config[catchment]['paths']['elevation_tif_path'],
    catchment_gdf=catchment_polygon,
    mesh_cells_gdf_polygons=mesh_cells_gdf_polygons,
    catchment=catchment,
    elev_max=config[catchment]['preprocessing']['catchment_max_elevation'],
    elev_min=config[catchment]['preprocessing']['catchment_min_elevation'],
    output_geojson_dir=config[catchment]['paths']['output_polygon_dir'],
    elevation_geojson_path=config[catchment]['paths']['elevation_geojson_path'],
    grid_resolution=config[catchment]['preprocessing']['graph_construction']['grid_resolution']
)

In [None]:
elevation_gdf_polygon

In [None]:
print(f"Skewness: {skew(mesh_cells_gdf_polygons['mean_elevation']):.3f}")

sns.histplot(mesh_cells_gdf_polygons['mean_elevation'], kde=True)
plt.title("Elevation Distribution")
plt.xlabel("Mean Elevation (mAOD)")
plt.show()


Moderate skew but not concerning enough at this point to require transformation especially as it wil be scaled. Will return to after first model iteration.

Merge into mesh

In [None]:
merged_gdf_nodes_elevation = merged_gdf_nodes_landuse.merge(
    elevation_gdf_polygon[['node_id', 'mean_elevation', 'polygon_geometry']],
    on='node_id',
    how='left'  # left join to keep all centroids, even NaN
)

In [None]:
display(merged_gdf_nodes_elevation)

### Derive slope data from DEM ###

In [None]:
# Derived from higher resolution DEM data in previous step
slope_gdf, directional_edge_weights = derive_slope_data(
    high_res_raster=clipped_dtm,
    mesh_cells_gdf_polygons=mesh_cells_gdf_polygons,
    catchment=catchment,
    direction_output_path=config[catchment]['paths']['direction_edge_weights_path'],
    slope_output_path=config[catchment]['paths']['slope_path']
)

In [None]:
print(f"Slope (Degrees): {skew(slope_gdf['mean_slope_degrees']):.3f}")

sns.histplot(slope_gdf['mean_slope_degrees'], kde=True)
plt.title("Slope Distribution")
plt.xlabel("Mean Slope (Degrees)")
plt.show()


In [None]:
print(f"Slope Aspect (Sine): {skew(slope_gdf['mean_aspect_sin']):.3f}")

sns.histplot(slope_gdf['mean_aspect_sin'], kde=True)
plt.title("Slope Aspect Sine Distribution")
plt.xlabel("Mean Slope Aspect (Sine)")
plt.show()

In [None]:
print(f"Slope Aspect (Cosine): {skew(slope_gdf['mean_aspect_cos']):.3f}")

sns.histplot(slope_gdf['mean_aspect_cos'], kde=True)
plt.title("Slope Aspect Cosine Distribution")
plt.xlabel("Mean Slope Aspect (Cosine)")
plt.show()

In [None]:
display(slope_gdf)

In [None]:
merged_gdf_nodes_slope = merged_gdf_nodes_elevation.merge(
    slope_gdf[['node_id', 'mean_slope_degrees', 'mean_aspect_sin', 'mean_aspect_cos']],
    on='node_id',
    how='left'  # left join to keep all centroids, even NaN
)

merged_gdf_nodes_slope = reorder_static_columns(merged_gdf_nodes_slope)

In [None]:
display(merged_gdf_nodes_slope)

### Checking the directional weights are logical and plausible based on the map: ###

In [None]:
directional_map = plot_directional_mesh(
    directional_edge_weights_gdf=directional_edge_weights,
    catchment_polygon=catchment_polygon,
    output_path=config[catchment]['visualisations']['maps']['interactive_directional_map_output'],
    esri=config['global']['visualisations']['maps']['esri'],
    esri_attr=config['global']['visualisations']['maps']['esri_attr']
)

logger.info(f"Pipeline step 'Interactive Directional Mesh Mapping' complete for {catchment} catchment.")

# # Show map in notebook
# directional_map

In [None]:
display(directional_edge_weights[['mean_slope_dx', 'mean_slope_dy', 'easting', 'northing']])

### Soil Type Data Ingestion ###

In [None]:
# CEH's Grid-to-Grid soil maps / HOST soil classes

### Aquifer Type Data Ingestion ###

In [None]:
# DEFRA / British Geological Survey (BGS)


### Aquifer Transmissivity Data Ingestion ###

In [None]:
# DEFRA / British Geological Survey (BGS)

### Geological Maps Data Ingestion ###

In [None]:
# DIGIMAPS (BGS data via Geology Digimap)

# Load and explore geology data
mesh_geology_df = load_and_process_geology_layers(
    base_dir=config[catchment]["paths"]["geology_dir"],
    mesh_crs=mesh_cells_gdf_polygons.crs,
    columns_of_interest={"bedrock": ["RCS_ORIGIN"], "superficial": ["RCS_D"]},
    mesh_cells_gdf_polygons=mesh_cells_gdf_polygons,
    catchment=catchment
)

mesh_geology_df

In [None]:
feature_category_colors, feature_category_labels, layer_labels = get_geo_feats()
geology_map = plot_geology_layers_interactive(
    mesh_geology_df=mesh_geology_df,
    catchment_polygon=catchment_polygon,
    esri=config['global']['visualisations']['maps']['esri'],
    esri_attr=config['global']['visualisations']['maps']['esri_attr'],
    output_path=config[catchment]["visualisations"]["maps"]["interactive_mesh_map_output"],
    feature_columns=['geo_superficial_type','geo_bedrock_type'],
    category_colors=feature_category_colors,
    category_labels=feature_category_labels,
    map_blue=config['global']['visualisations']['maps']['map_blue'],
    layer_labels=layer_labels
)

geology_map

### Distance from River (Derived) ###

In [None]:
# Derived from DEFRA / DIGIMAP