Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add PySMO's confidence interval function to new surrogate interface #1005

Merged
merged 15 commits into from Nov 14, 2022
Expand Up @@ -102,10 +102,14 @@ The result of the ``pysmo_surrogate.PysmoPolyTrainer`` method is a python object

Confidence intervals for *PysmoPolyTrainer* models
--------------------------------------------------------------------
**Coming soon**

The confidence intervals for the regression paramaters may be viewed using the method ``confint_regression``.
PySMO provides the user with the capability to compute confidence intervals for the regression paramaters using the ``get_confidence_intervals`` method. This can be done by passing the result of the model training and the confidence interval value (*default=0.95*) into the ``pysmo_surrogate.PysmoPolyTrainer`` object:

.. code-block:: python

>>> conf_int = pr_trainer.get_confidence_intervals(poly_train, confidence=0.99)
OOAmusat marked this conversation as resolved.
Show resolved Hide resolved

The resulting object ```conf_int`` is a dictionary containing upper and lower confidence bounds as well as the estimated standard errors for all of the regression parameters of the trained models in ``poly_train``. The dictionary keys correspond to the output variable list supplied in ``output labels``.

Surrogate Visualization
------------------------
Expand Down
27 changes: 27 additions & 0 deletions idaes/core/surrogate/pysmo_surrogate.py
Expand Up @@ -328,6 +328,33 @@ def _create_model(self, pysmo_input, output_label):
def _get_metrics(self, model):
return {"RMSE": model.errors["MSE"] ** 0.5, "R2": model.errors["R2"]}

def get_confidence_intervals(
self, model: PysmoTrainedSurrogate, confidence: float = 0.95
) -> Dict:
"""
Compute confidence intervals for the regression patamaters.

Args:
model : A PysmoTrainedSurrogate object
confidence : Required confidence interval level, default = 0.95 (95%)

Returns:
dict(<dict>) : Dictionary object containing confidence intervals for all regressed parameters.
OOAmusat marked this conversation as resolved.
Show resolved Hide resolved

The dictionary keys are the output variables originally supplied during model training.

The dictionary values are dataframes containing four columns:

- Regression coeff. : The regression coefficients for the trained model
- Std. errors : The standard error on the estimated coefficient
- Conf. int. lower : Lower confidence bounds for the estimated regression parameters
- Conf. int. upper : Upper confidence bounds for the estimated regression parameters
"""
confint_dict = {}
for i in model.output_labels:
confint_dict[i] = model._data[i].model.confint_regression(confidence)
OOAmusat marked this conversation as resolved.
Show resolved Hide resolved
return confint_dict


class PysmoRBFTrainer(PysmoTrainer):
"""
Expand Down
73 changes: 73 additions & 0 deletions idaes/core/surrogate/tests/test_pysmo_surrogate.py
Expand Up @@ -1633,6 +1633,79 @@ def test_populate_block_multisurrogate_poly_userdef(self, pysmo_surr4):
"outputs[z2] - (-12.523574144487087 - 2.1308935361219556*inputs[x1] + 4.1308935361216435*inputs[x2] + 3.6347869158959156e-12*(inputs[x1]/inputs[x2]))"
)

@pytest.mark.parametrize(
"confidence_dict", [{0.99: 3.2498355440153697}, {0.90: 1.8331129326536335}]
)
@pytest.mark.unit
def test_confint_default(self, confidence_dict):
# Test that the ``get_confidence_intervals`` function returns the correct upper and lower confidence interval bounds.
training_data = {
"x1": [1, 2, 3, 4, 5],
"x2": [5, 6, 7, 8, 9],
"z1": [10, 20, 30, 40, 50],
"z2": [6, 8, 10, 12, 14],
}
training_data = pd.DataFrame(training_data)
validation_data = {
"x1": [1, 2, 3, 4],
"x2": [5, 6, 7, 8],
"z1": [10, 20, 30, 40],
"z2": [6, 8, 10, 12],
}
validation_data = pd.DataFrame(validation_data)
input_labels = ["x1", "x2"]
output_labels = ["z1", "z2"]
bnds = {"x1": (0, 5), "x2": (0, 10)}

pysmo_trainer = PysmoPolyTrainer(
input_labels=input_labels,
output_labels=output_labels,
input_bounds=bnds,
training_dataframe=training_data,
validation_dataframe=validation_data,
maximum_polynomial_order=1,
multinomials=True,
number_of_crossvalidations=3,
)

a2_poly = pysmo_trainer.train_surrogate()
for k in confidence_dict.keys():
confidence = k
tval = confidence_dict[k]

output = pysmo_trainer.get_confidence_intervals(a2_poly, confidence)
OOAmusat marked this conversation as resolved.
Show resolved Hide resolved

reg_coeffs = {
"z1": np.array(
[
-75.26111111111476,
-8.815277777775934,
18.81527777777826,
-2.2556956302821618e-13,
]
),
"z2": np.array(
[
-3.0033074724377813,
0.2491731318906352,
1.7508268681094337,
6.786238238021269e-15,
]
),
}

# Test that output has the right number of dictionary entries
assert len(output) == len(output_labels)
for i in output_labels:
# Test that the lower confidence bounds are correctly calculated.
assert pytest.approx(output[i]["Conf. int. lower"].values, abs=1e-9) == (
reg_coeffs[i] - tval * output[i]["Std. error"].values
)
# Test that the upper confidence bounds are correctly calculated.
assert pytest.approx(output[i]["Conf. int. upper"].values, abs=1e-9) == (
reg_coeffs[i] + tval * output[i]["Std. error"].values
)

@pytest.mark.unit
def test_evaluate_unisurrogate_rbf(self, pysmo_surr5_rbf):
# Test ``evaluate_surrogate`` for RBF with one input/output
Expand Down