In [1]:
# Load library imports
import os
import sys
import torch
import random
import logging
import numpy as np
import xarray as xr
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

from pyproj import Transformer
from shapely.geometry import box
from datetime import datetime, timedelta

# Load project Imports
from src.utils.config_loader import load_project_config
from src.graph_building.graph_construction import build_mesh, \
    define_catchment_polygon
from src.data_ingestion.spatial_transformations import find_catchment_boundary
from src.data_ingestion.timeseries_data_ingestion import find_haduk_file_names, \
    load_era5_land_data

In [2]:
# Set up logger config
logging.basicConfig(
    level=logging.INFO,
   format='%(levelname)s - %(message)s',
#    format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
    handlers=[logging.StreamHandler(sys.stdout)]
)

# Set up logger for file and load config file for paths and params
logger = logging.getLogger(__name__)
config = load_project_config(config_path="config/project_config.yaml")
notebook = True

# Set up seeding to define global states
random_seed = config["global"]["pipeline_settings"]["random_seed"]
random.seed(random_seed)
np.random.seed(random_seed)
torch.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Define notebook demo catchment
catchments_to_process = config["global"]["pipeline_settings"]["catchments_to_process"]
catchment = catchments_to_process[0]
run_defra_API_calls = config["global"]["pipeline_settings"]["run_defra_api"]  # True to run API calls

logger.info(f"Show Notebook Outputs: {notebook}")
logger.info(f"Notebook Demo Catchment: {catchment.capitalize()}")

INFO - Loading configuration from: /Users/charlotte/Desktop/Dissertation_Code/config/project_config.yaml
INFO - Show Notebook Outputs: True
INFO - Notebook Demo Catchment: Eden


### Build mesh ###

To align ingested data with

In [None]:
# Select Catchment area from country wide gdf
define_catchment_polygon(
    england_catchment_gdf_path=config[catchment]['paths']['gis_catchment_boundary'],
    target_mncat=config[catchment]['target_mncat'],
    catchment=catchment,
    polygon_output_path=config[catchment]['paths']['gis_catchment_dir']
)

In [None]:
# NB: mesh_nodes_gdf are the centroid coords, mesh_cells_gdf_polygons are polygons for e.g. averaging area
mesh_nodes_table, mesh_nodes_gdf, mesh_cells_gdf_polygons, catchment_polygon = build_mesh(
    shape_filepath=config[catchment]['paths']['gis_catchment_dir'],
    output_path=config[catchment]['paths']['mesh_nodes_output'],
    catchment=catchment,
    grid_resolution=config[catchment]['preprocessing']['graph_construction']['grid_resolution']
)

### Rainfall Ingestion and Preprocessing ###

In [None]:
haduk_urls = find_haduk_file_names(
    start_date=config["global"]["data_ingestion"]["model_start_date"],
    end_date=config["global"]["data_ingestion"]["model_end_date"],
    base_url = config["global"]["paths"]["HAD_UK_rainfall_url"]
)

Continue with the following once https://www.ceda.ac.uk/status/ allows.

In [None]:
os.environ["NETRC"] = os.path.abspath("ceda_credentials.netrc")
url = haduk_urls[0]

try:
    ds = xr.open_dataset(url, engine="netcdf4")
    print(ds)
except OSError as e:
    print("Failed to open dataset. This may be due to CEDA login issues. Check https://www.ceda.ac.uk/status/")
    print(e)

### Temperature Ingestion and Preprocessing ###

Continue with the following once https://www.ceda.ac.uk/status/ allows.

### Air Pressure Ingestion and Preprocessing ###

In [None]:
pressure_data = load_era5_land_data(
    catchment=catchment,
    shape_filepath=config[catchment]['paths']['gis_catchment_dir'],
    required_crs=27700,
    cdsapi_path=config["global"]["paths"]["CDSAPI_path"],
    start_date=config["global"]["data_ingestion"]["model_start_date"],
    end_date=config["global"]["data_ingestion"]["model_end_date"],
    run_era5_land_api=config["global"]["pipeline_settings"]["run_era5_land_api"],
    raw_output_dir=config[catchment]["paths"]["sp_raw_output_dir"],
    processed_output_dir=config[catchment]["paths"]["sp_processed_output_dir"],
    csv_path=config[catchment]["paths"]["sp_csv_path"],
    fig_path=config[catchment]["paths"]["sp_fig_path"],
    era5_feat='sp',
    era5_long='surface_pressure',
    feat_name='surface_pressure',
    aggregation_type='mean'
)

### AET Ingestion and Preprocessing ###

In [None]:
aet_data = load_era5_land_data(
    catchment=catchment,
    shape_filepath=config[catchment]['paths']['gis_catchment_dir'],
    required_crs=27700,
    cdsapi_path=config["global"]["paths"]["CDSAPI_path"],
    start_date=config["global"]["data_ingestion"]["model_start_date"],
    end_date=config["global"]["data_ingestion"]["model_end_date"],
    run_era5_land_api=config["global"]["pipeline_settings"]["run_era5_land_api"],
    raw_output_dir=config[catchment]["paths"]["aet_raw_output_dir"],
    processed_output_dir=config[catchment]["paths"]["aet_processed_output_dir"],
    csv_path=config[catchment]["paths"]["aet_csv_path"],
    fig_path=config[catchment]["paths"]["aet_fig_path"],
    era5_feat='e',
    era5_long='total_evaporation',
    feat_name='aet',
    aggregation_type='sum'
)

### 2m Surface Temperature Ingestion and Preprocessing ###

In [None]:
temp_data = load_era5_land_data(
    catchment=catchment,
    shape_filepath=config[catchment]['paths']['gis_catchment_dir'],
    required_crs=27700,
    cdsapi_path=config["global"]["paths"]["CDSAPI_path"],
    start_date=config["global"]["data_ingestion"]["model_start_date"],
    end_date=config["global"]["data_ingestion"]["model_end_date"],
    run_era5_land_api=config["global"]["pipeline_settings"]["run_era5_land_api"],
    raw_output_dir=config[catchment]["paths"]["2t_raw_output_dir"],
    processed_output_dir=config[catchment]["paths"]["2t_processed_output_dir"],
    csv_path=config[catchment]["paths"]["2t_csv_path"],
    fig_path=config[catchment]["paths"]["2t_fig_path"],
    era5_feat='2t',
    era5_long='2m_temperature',
    feat_name='2m_temp',
    aggregation_type='mean'
)

In [None]:
grib_path = 'data/01_raw/eden/hydroclimatic/surface_pressure/surface_pressure_2017_01_era5land.grib'
ds = xr.open_dataset(grib_path, engine='cfgrib')

print(ds['sp'].min().item(), ds['sp'].max().item(), ds['sp'].mean().item())  # print(ds.variables) to see full metadata

In [None]:
# import os
# import re

# def rename_aet_files_to_total_evaporation(directory):
#     """
#     Renames GRIB and index files from 'aet_YYYY_MM_era5land.*' 
#     to 'total_evaporation_YYYY_MM_era5land.*' in the given directory.
#     """
#     renamed_files = 0

#     for filename in os.listdir(directory):
#         if filename.startswith("aet_") and "era5land" in filename:
#             new_filename = re.sub(r'^aet_', 'total_evaporation_', filename)
#             src = os.path.join(directory, filename)
#             dst = os.path.join(directory, new_filename)
#             os.rename(src, dst)
#             renamed_files += 1
#             print(f"Renamed: {filename} -> {new_filename}")

#     print(f"\nDone. Renamed {renamed_files} files in '{directory}'.")

# # Example usage:
# rename_aet_files_to_total_evaporation("data/01_raw/eden/hydroclimatic/aet/")
