### Calculates the precipitation frequency estimates for all the durations

In [1]:
import logging
from pathlib import Path

import pandas as pd
import yaml
from pydantic import ValidationError

from config.update import get_additional_config_parameters
from config.validate import ConfigValidate
from estimation.functions import (
    extract_daily_or_subdaily_weights,
    solve_points_one_duration_header,
)
from estimation.main import fit_regions_parallel
from preprocess.main import preprocess_input_data

#### Read the configuration file and set logging information

In [2]:
config_file = "config.YAML"
try:
    with open(config_file, "r") as file:
        config = yaml.safe_load(file)
except FileNotFoundError:
    print(
        "Configuration file not found. "
        "Place the YAML file in the same folder as this notebook."
    )
    raise

In [3]:
# Extract logging configuration
log_file = config["fit_log_file"]
log_level = config["log_level"].upper()

# Remove all existing handlers if they are already defined
for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)

# Set up logging
logging.basicConfig(
    filename=log_file,
    filemode="a",
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    level=getattr(logging, log_level, logging.INFO),
)

# Create a logger
logger = logging.getLogger(__name__)

#### Validate configuration file.
##### This step ensures that the configuration adheres to predefined schemas, catching type errors or missing fields early.

In [5]:
try:
    config_settings = ConfigValidate(**config)
    logging.info("Configuration loaded successfully:")
except ValidationError:
    logger.error("Validation error occurred while loading GeneralSettings", exc_info=True)
    raise
except Exception as e:
    logger.error(f"An unexpected error occurred: {e}", exc_info=True)
    raise

#### Extract additional parameters based on project region

In [7]:
all_durations = ["60m", "06h", "24h", "04d", "10d", "60d"]
ok_mc = False

config = get_additional_config_parameters(config, ok_save_log=True)

#### Calculate the precipitation frequency estimates for all the durations
##### Fit the distribution using multiple cores.

In [9]:
for duration in all_durations:
    if duration in ["60m", "06h"]:
        dur_str = "subdaily"
    else:
        dur_str = "daily"

    print(f"\nFitting for duration: {duration}")
    logging.info(f"Fitting for duration: {duration}")

    config["ams_duration"] = duration
    config = get_additional_config_parameters(config, ok_save_log=True)

    is_regional_analysis = config["region"] != "conus"
    is_24hour_duration = duration == "24h"
    ## Generate plots only for 24-hr duration and sub-region of CONUS
    config["save_plots"] = is_regional_analysis and is_24hour_duration

    df_grid, df_meta, df_ams, ds_elev = preprocess_input_data(config)
    df_neighbor_weights = extract_daily_or_subdaily_weights(config, duration).set_index(
        "id_num"
    )

    out_file = config["csvInputFileName"].replace(".csv", "")
    out_file = out_file + "_output_" + config["ams_duration"] + ".csv"
    out_fit_file = Path(config["proposedOutputPath"], config["ams_duration"], out_file)

    header = solve_points_one_duration_header(config)
    with open(out_fit_file, "w") as f:
        f.writelines(header)

    out_weight_file = Path(
        config["proposedOutputPath"],
        config["ams_duration"],
        f"NeighborWeights_{dur_str}_{config['ams_duration']}.csv",
    )

    fit_regions_parallel(
        df_grid=df_grid,
        df_meta=df_meta,
        df_ams=df_ams,
        df_neighbor_weights=df_neighbor_weights,
        config=config,
        out_fit_file=out_fit_file,
        out_weight_file=out_weight_file,
        ok_mc=ok_mc,
        n_jobs=8,  # -1: Use all available cores
    )


Fitting for duration: 60m


Processing regions: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 1151/1151 [00:10<00:00]



Fitting for duration: 06h


Processing regions: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 1151/1151 [00:08<00:00]



Fitting for duration: 24h


Processing regions: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 1151/1151 [08:55<00:00]



Fitting for duration: 04d


Processing regions: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 1151/1151 [00:41<00:00]



Fitting for duration: 10d


Processing regions: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 1151/1151 [00:42<00:00]



Fitting for duration: 60d


Processing regions: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 1151/1151 [00:41<00:00]


#### Combine and save the estimates

In [10]:
dfs = []
df_combined = []

for duration in all_durations:
    in_file = (
        config["csvInputFileName"].replace(".csv", "") + "_output_" + duration + ".csv"
    )
    in_fit_file = Path(config["proposedOutputPath"], duration, in_file)
    df = pd.read_csv(in_fit_file)
    df["dur"] = duration
    dfs.append(df)

df_combined = pd.concat(dfs, ignore_index=True)
out_path = Path(config["proposedOutputPath"], "combined")
out_path.mkdir(exist_ok=True)
df_combined.to_csv(Path(out_path, "df_out_combined.csv"), index=False)

In [11]:
df_combined

Unnamed: 0,k,Lat,Lon,prismMAP,gridMAM,Elev_m,ID0,HDSC0,nAMS0,MAM0,...,P_10y_in,P_25y_in,P_50y_in,P_100y_in,P_1000y_in,A14P_2y,A14P_25y,A14P_100y,elapsedTime(sec),dur
0,0,42.6250,-112.0881,17.39,0.4730,1502.0,0,10_9158,29,0.4730,...,0.747041,0.952548,1.126203,1.318780,2.129902,0.401,1.001000,1.300000,0.171626,60m
1,1,43.8417,-112.4183,8.29,0.3832,1460.0,3,89_0007,23,0.3832,...,0.608102,0.783612,0.934618,1.104678,1.846131,0.345,0.873000,1.118000,0.174949,60m
2,2,43.7435,-112.1211,9.22,0.4095,1456.0,4,89_0010,25,0.4095,...,0.650684,0.834704,0.991341,1.166131,1.912677,0.351,0.920000,1.180000,0.165928,60m
3,3,43.5941,-112.6517,7.73,0.4642,1565.0,5,89_0017,26,0.4642,...,0.738997,0.952144,1.134955,1.340281,2.230098,0.385,1.010000,1.305000,0.166616,60m
4,4,43.5897,-112.9399,7.50,0.3512,1497.0,6,89_0018,26,0.3512,...,0.559963,0.725141,0.868196,1.030211,1.745594,0.332,0.954000,1.262000,0.164552,60m
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5503,1146,47.8558,-117.0367,46.00,16.6412,1283.0,1513,86_1081,17,16.6412,...,21.942846,24.400318,26.068052,27.603201,31.928437,15.919,22.620001,25.431000,0.281558,60d
5504,1147,43.3227,-115.9964,10.95,4.2767,972.0,1514,88_0674,30,4.2767,...,5.992284,6.862727,7.476180,8.059194,9.815531,4.233,6.723000,7.824000,0.294815,60d
5505,1148,45.8027,-111.5865,12.58,5.2382,1363.0,1515,88_0808,45,5.2382,...,7.178687,8.142283,8.815611,9.450919,11.335881,4.773,7.578000,8.929000,0.329006,60d
5506,1149,43.6167,-116.2000,12.95,5.1191,825.0,1516,98_0001,75,5.1191,...,7.154982,8.188369,8.916874,9.609403,11.696752,5.323,7.666000,8.773000,0.297619,60d
