### Calculates the weights for daily and sub-daily durations

In [1]:
import logging
from pathlib import Path

import pandas as pd
import yaml
from pydantic import ValidationError

from config.update import get_additional_config_parameters
from config.validate import ConfigValidate
from preprocess.main import preprocess_input_data
from weights.main import combine_and_save_weights, compute_weights_grid_parallel

#### Read the configuration file and set logging information

In [2]:
config_file = "config.YAML"
try:
    with open(config_file, "r") as file:
        config = yaml.safe_load(file)
except FileNotFoundError:
    print(
        "Configuration file not found. "
        "Place the YAML file in the same folder as this notebook."
    )
    raise

In [3]:
# Extract logging configuration
log_file_path = config["weights_log_file"]
log_level = config["log_level"].upper()

# Remove all existing handlers if they are already defined
for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)

# Set up logging
logging.basicConfig(
    filename=log_file_path,
    filemode="a",
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    level=getattr(logging, log_level, logging.INFO),
)

# Create a logger
logger = logging.getLogger(__name__)

#### Validate configuration file.
##### This step ensures that the configuration adheres to predefined schemas, catching type errors or missing fields early.

In [5]:
try:
    config_settings = ConfigValidate(**config)
    logging.info("Configuration loaded successfully:")
except ValidationError:
    logger.error("Validation error occurred while loading GeneralSettings", exc_info=True)
    raise
except Exception as e:
    logger.error(f"An unexpected error occurred: {e}", exc_info=True)
    raise

#### Extract additional parameters based on project region

In [7]:
config = get_additional_config_parameters(config, ok_save_log=True)

#### Calculate weights for daily and sub-daily durations
##### Weight calculations for multiple stations are distributed among cores.

In [None]:
n_jobs = 8  # -1: use all cores

In [9]:
for duration in ["daily", "subdaily"]:
    if duration == "daily":
        sdur1 = "24h"
        sdur2 = "10d"
    elif duration == "subdaily":
        sdur1 = "60m"
        sdur2 = "06h"

    for sdur in [sdur1, sdur2]:
        print(f"\nComputing weights for the duration: {sdur}")
        logging.info(f"Computing weights for the duration: {sdur}")
        config["amsDuration"] = sdur
        output_weights_dir = Path(config["proposedOutputPath"], config["amsDuration"])
        df_grid, df_meta, df_ams, ds_elev = preprocess_input_data(config)
        elev_srtm = ds_elev["elevation"].load()
        compute_weights_grid_parallel(
            df_grid=df_grid,
            df_meta=df_meta,
            df_ams=df_ams,
            config=config,
            output_dir=output_weights_dir,
            final_output=Path(
                output_weights_dir, f"NeighborWeights_{config['amsDuration']}"
            ),
            elev_srtm=elev_srtm,
            n_jobs=n_jobs,  # -1: use all cores
            batch_size=50,  # Adjust based on in-memory size
            cleanup_intermediate_files=True,
        )

    print(f"\nCombining weights for {sdur1} and {sdur2} duration")

    file1 = Path(config["proposedOutputPath"], sdur1, f"NeighborWeights_{sdur1}.csv")
    file2 = Path(config["proposedOutputPath"], sdur2, f"NeighborWeights_{sdur2}.csv")

    df1 = pd.read_csv(file1)
    df2 = pd.read_csv(file2)

    df_daily = combine_and_save_weights(df1, df2, duration, sdur1, sdur2, config)


Computing weights for the duration: 24h
Processed 1151 out of 1151 points (100.0%) - Elapsed time (hh:mm:ss): 00:23:59
Computing weights for the duration: 10d
Processed 1151 out of 1151 points (100.0%) - Elapsed time (hh:mm:ss): 00:23:59
Combining weights for 24h and 10d duration

Computing weights for the duration: 60m
Processed 1151 out of 1151 points (100.0%) - Elapsed time (hh:mm:ss): 00:24:03
Computing weights for the duration: 06h
Processed 1151 out of 1151 points (100.0%) - Elapsed time (hh:mm:ss): 00:23:57
Combining weights for 60m and 06h duration
