In [4]:
import yaml

data = {
    "description": "Calibration and bias prediction workflow for Torque 2026 Abstract",
    "preprocessing": {
        "run": True,
        "base_dir":"data/EDF_datasets",
        "case_names":"all"
    },
    "database_gen": {
        "run": True,
        "location":"data/EDF_datasets",
        "flow_model": "pywake",
        "param_config":{
        "attributes.analysis.wind_deficit_model.wake_expansion_coefficient.k_b": [0.01, 0.07],
        "attributes.analysis.blockage_model.ss_alpha": [0.75, 1.0]
        },
        "n_samples":100,
    },
    "error_prediction":{
        "run":True,
        "features":['Blocking_Distance','f_b','HH_div_ABLh','Blockage_Ratio','turbulence_intensity'], 
        "ML_pipeline":"Default hardcoded",
        "param_calibration": "default",    # default corresponds to k=0.04, could be single value mse calibration or bayesian... then other settings would develop
        "cross_validation":{
            "run":True,
            "splitting_mode":"kfold_shuffled",
            "n_splits":10,
            "metrics":['rmse','r2','mae'],
            "misc_analysis": { # optional analysis so that we can compare resulting bias when we bias correct vs calibrate etc.
                "run":True,
                "bias_correct":True,
                "quantities":['ref_bias', 'pw_bias'] # these are the extra quantities that we return for each cross validation fold so that we can compute the bias following bias correction or calibration
                },
        },

    },

}

output_file = "err_prediction_example.yaml"

with open(output_file, "w") as file:
    yaml.dump(data, file, default_flow_style=False, sort_keys=False)