In [1]:
# Load library imports
import sys
import torch
import random
import logging
import numpy as np
import pandas as pd
import seaborn as sns
import geopandas as gpd
from scipy.stats import skew
import matplotlib.pyplot as plt
from shapely.geometry import box


# Load project Imports
from src.utils.config_loader import load_project_config
from src.graph_building.graph_construction import build_mesh, \
    define_catchment_polygon, build_main_df
from src.graph_building.data_merging import snap_stations_to_mesh
from src.visualisation.mapped_visualisations import plot_interactive_mesh_with_stations

In [2]:
# Set up logger config
logging.basicConfig(
    level=logging.INFO,
   format='%(levelname)s - %(message)s',
#    format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
    handlers=[logging.StreamHandler(sys.stdout)]
)

# Set up logger for file and load config file for paths and params
logger = logging.getLogger(__name__)
config = load_project_config(config_path="config/project_config.yaml")
notebook = True

# Set up seeding to define global states
random_seed = config["global"]["pipeline_settings"]["random_seed"]
random.seed(random_seed)
np.random.seed(random_seed)
torch.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Define notebook demo catchment
catchments_to_process = config["global"]["pipeline_settings"]["catchments_to_process"]
catchment = catchments_to_process[0]
run_defra_API_calls = config["global"]["pipeline_settings"]["run_defra_api"]

logger.info(f"Show Notebook Outputs: {notebook}")
logger.info(f"Notebook Demo Catchment: {catchment.capitalize()}")

INFO - Loading configuration from: /Users/charlotte/Desktop/Dissertation_Code/config/project_config.yaml
INFO - Show Notebook Outputs: True
INFO - Notebook Demo Catchment: Eden


In [3]:
# Select Catchment area from country wide gdf
define_catchment_polygon(
    england_catchment_gdf_path=config[catchment]['paths']['gis_catchment_boundary'],
    target_mncat=config[catchment]['target_mncat'],
    catchment=catchment,
    polygon_output_path=config[catchment]['paths']['gis_catchment_dir']
)

# Build catchment mesh
mesh_nodes_table, mesh_nodes_gdf, mesh_cells_gdf_polygons, catchment_polygon = build_mesh(
    shape_filepath=config[catchment]['paths']['gis_catchment_dir'],
    output_path=config[catchment]['paths']['mesh_nodes_output'],
    catchment=catchment,
    grid_resolution=config[catchment]['preprocessing']['graph_construction']['grid_resolution']
)

logger.info(f"Pipeline step 'Build Mesh' complete for {catchment} catchment.")

INFO - Eden and Esk boundary polygon(s) extracted from England data.
data/01_raw/eden/gis/eden_catchment_boundary/eden_combined_boundary.geojson
INFO - Combined Eden and Esk boundary saved to: data/01_raw/eden/gis/eden_catchment_boundary/eden_combined_boundary.geojson
INFO - BUILD_MESH: Starting mesh construction with input: data/01_raw/eden/gis/eden_catchment_boundary/eden_combined_boundary.geojson and resolution: 1000m

INFO - Finding eden catchment spatial boundaries...

INFO - Loading  boundary from: data/01_raw/eden/gis/eden_catchment_boundary/eden_combined_boundary.geojson
INFO - Single polygon found in the catchment boundary.
INFO - Catchment bounding box: min_x=325093.81960000005, min_y=495490.0, max_x=389010.0, max_y=590440.0

INFO - Aligned minx: 325000.0, miny: 495000.0, maxx: 390000.0, maxy: 591000.0
INFO - Number of x-coordinates (bottom-left): 66
INFO - Number of y-coordinates (bottom-left): 97

INFO - Generated 6402 grid cells (centroids and polygons) within bounding box

Load in centroid node csv's

In [4]:
# input_path=config[catchment]['paths']['mesh_nodes_output']
# grid_resolution=config[catchment]['preprocessing']['graph_construction']['grid_resolution']

# mesh_input_path = input_path + '_' + str(grid_resolution) + '.csv'
# mesh_nodes = pd.read_csv(mesh_input_path)

Merge station list with polygon geometry using spatial join to snap stations to mesh

In [5]:
station_node_mapping = snap_stations_to_mesh(
    station_list_path=config[catchment]["paths"]["gwl_station_list_output"],
    polygon_geometry_path=config[catchment]['paths']['output_polygon_dir'],
    output_path=config[catchment]["paths"]["snapped_station_node_mapping"],
    mesh_nodes_gdf=mesh_nodes_gdf,
    catchment=catchment
)

INFO - Snapping eden catchment stations to mesh centroids...

INFO - Spatial merging catchment stations within polygon geometry.
INFO - Merging catchment stations to retain original polygon.
INFO - Merging catchment stations with centroid geometry.
INFO - Replacing original station geometry with nearest centroid geometry.
INFO - Converting spatial mapping DataFrame to GeoDataFrame.

INFO - Saving snapped station list to data/02_processed/eden/gwl_station_data/snapped_station_node_mapping.csv
INFO - All eden catchment stations snapped to centroids.



  exec(code_obj, self.user_global_ns, self.user_ns)


In [6]:
station_node_mapping

Unnamed: 0,node_id,station_id,station_name,easting,northing,geometry
0,430,NY36_2,LONGTOWN,339500.0,567500.0,POINT (339500.000 567500.000)
1,902,NY46_3,Scaleby,346500.0,564500.0,POINT (346500.000 564500.000)
2,1420,NY55_71,Castle carrock,353500.0,553500.0,POINT (353500.000 553500.000)
3,1648,NY54_54,Croglin,356500.0,548500.0,POINT (356500.000 548500.000)
4,1335,NY54_10,Ainstable,352500.0,546500.0,POINT (352500.000 546500.000)
5,1254,NY54_56,Baronwood,351500.0,542500.0,POINT (351500.000 542500.000)
6,1858,NY54_55,Renwick,359500.0,541500.0,POINT (359500.000 541500.000)
7,1326,NY53_9,East Brownrigg,352500.0,537500.0,POINT (352500.000 537500.000)
8,1556,NY53_51,BGS EV2,355500.0,534500.0,POINT (355500.000 534500.000)
9,1983,NY63_2,Skirwith,361500.0,532500.0,POINT (361500.000 532500.000)


In [7]:
mesh_map = plot_interactive_mesh_with_stations(
    mesh_nodes_gdf=mesh_nodes_gdf,
    catchment_polygon=catchment_polygon,
    map_blue=config['global']['visualisations']['maps']['map_blue'],
    esri=config['global']['visualisations']['maps']['esri'],
    esri_attr=config['global']['visualisations']['maps']['esri_attr'],
    static_output_path=config[catchment]['visualisations']['maps']['static_mesh_map_output'],
    interactive_output_path=config[catchment]['visualisations']['maps']['interactive_station_map_output'],
    catchment=catchment,
    grid_resolution=config[catchment]['preprocessing']['graph_construction']['grid_resolution'],
    interactive=config['global']['visualisations']['maps']['display_interactive_map'],
    stations_gdf=station_node_mapping
)

logger.info(f"Pipeline step 'Interactive Mesh Mapping' complete for {catchment} catchment.")

# Display map in notebook
mesh_map

INFO - PLOT_INTERACTIVE_MESH: Plotting catchment mesh overlaid on map.
INFO - Interactive map file saved to: results/figures/eden/maps/interactive_station_mesh_map_1000.html

INFO - Pipeline step 'Interactive Mesh Mapping' complete for eden catchment.


Create a main df for merging all features in to model input

In [None]:
main_df = build_main_df(
    start_date = config["global"]["data_ingestion"]["model_start_date"],
    end_date = config["global"]["data_ingestion"]["model_end_date"],
    mesh_nodes_gdf=mesh_nodes_gdf,
    catchment=catchment
)

INFO - Building main model input dataframe for eden catchment...

INFO - Building timesteps from 2014-01-01 to 2024-12-31
INFO - Building node ID's from 0 to 2749
INFO - Converting multi index to data frame for feature merging

Total rows in main eden catchment DataFrame: 1e+07



Unnamed: 0,timestep,node_id
0,2014-01-01,0
1,2014-01-01,1
2,2014-01-01,2
3,2014-01-01,3
4,2014-01-01,4
...,...,...
11049495,2024-12-31,2745
11049496,2024-12-31,2746
11049497,2024-12-31,2747
11049498,2024-12-31,2748


Merge all timeseries data into main df by station (from {station}_trimmed.csv)