IDAES · andrewlee94 · Nov 14, 2022 · Nov 2, 2022 · Nov 2, 2022 · Nov 2, 2022
@@ -102,10 +102,14 @@ The result of the ``pysmo_surrogate.PysmoPolyTrainer`` method is a python object
 
 Confidence intervals for *PysmoPolyTrainer* models
 --------------------------------------------------------------------
-**Coming soon**
 
-The confidence intervals for the regression paramaters may be viewed using the method ``confint_regression``.
+PySMO provides the user with the capability to compute confidence intervals for the regression paramaters using the ``get_confidence_intervals`` method. This can be done by passing the result of the model training and the confidence interval value (*default=0.95*) into the ``pysmo_surrogate.PysmoPolyTrainer`` object:
 
+.. code-block:: python
+
+  >>> conf_int = pr_trainer.get_confidence_intervals(poly_train, confidence=0.99)
+
+The resulting object ```conf_int`` is a dictionary containing upper and lower confidence bounds as well as the estimated standard errors for all of the regression parameters of the trained models in ``poly_train``. The dictionary keys correspond to the output variable list supplied in ``output labels``. 
 
 Surrogate Visualization
 ------------------------

@@ -328,6 +328,33 @@ def _create_model(self, pysmo_input, output_label):
     def _get_metrics(self, model):
         return {"RMSE": model.errors["MSE"] ** 0.5, "R2": model.errors["R2"]}
 
+    def get_confidence_intervals(
+        self, model: PysmoTrainedSurrogate, confidence: float = 0.95
+    ) -> Dict:
+        """
+        Compute confidence intervals for the regression patamaters.
+
+        Args:
+            model           : A PysmoTrainedSurrogate object
+            confidence      : Required confidence interval level, default = 0.95 (95%)
+
+        Returns:
+            dict(<dict>)    : Dictionary object containing confidence intervals for all regressed parameters.
+
+                              The dictionary keys are the output variables originally supplied during model training.
+
+                              The dictionary values are dataframes containing four columns:
+
+                                - Regression coeff.  : The regression coefficients for the trained model
+                                - Std. errors        : The standard error on the estimated coefficient
+                                - Conf. int. lower   : Lower confidence bounds for the estimated regression parameters
+                                - Conf. int. upper   : Upper confidence bounds for the estimated regression parameters
+        """
+        confint_dict = {}
+        for i in model.output_labels:
+            confint_dict[i] = model._data[i].model.confint_regression(confidence)
+        return confint_dict
+
 
 class PysmoRBFTrainer(PysmoTrainer):
     """

@@ -1633,6 +1633,79 @@ def test_populate_block_multisurrogate_poly_userdef(self, pysmo_surr4):
             "outputs[z2] - (-12.523574144487087 - 2.1308935361219556*inputs[x1] + 4.1308935361216435*inputs[x2] + 3.6347869158959156e-12*(inputs[x1]/inputs[x2]))"
         )
 
+    @pytest.mark.parametrize(
+        "confidence_dict", [{0.99: 3.2498355440153697}, {0.90: 1.8331129326536335}]
+    )
+    @pytest.mark.unit
+    def test_confint_default(self, confidence_dict):
+        # Test that the ``get_confidence_intervals`` function returns the correct upper and lower confidence interval bounds.
+        training_data = {
+            "x1": [1, 2, 3, 4, 5],
+            "x2": [5, 6, 7, 8, 9],
+            "z1": [10, 20, 30, 40, 50],
+            "z2": [6, 8, 10, 12, 14],
+        }
+        training_data = pd.DataFrame(training_data)
+        validation_data = {
+            "x1": [1, 2, 3, 4],
+            "x2": [5, 6, 7, 8],
+            "z1": [10, 20, 30, 40],
+            "z2": [6, 8, 10, 12],
+        }
+        validation_data = pd.DataFrame(validation_data)
+        input_labels = ["x1", "x2"]
+        output_labels = ["z1", "z2"]
+        bnds = {"x1": (0, 5), "x2": (0, 10)}
+
+        pysmo_trainer = PysmoPolyTrainer(
+            input_labels=input_labels,
+            output_labels=output_labels,
+            input_bounds=bnds,
+            training_dataframe=training_data,
+            validation_dataframe=validation_data,
+            maximum_polynomial_order=1,
+            multinomials=True,
+            number_of_crossvalidations=3,
+        )
+
+        a2_poly = pysmo_trainer.train_surrogate()
+        for k in confidence_dict.keys():
+            confidence = k
+            tval = confidence_dict[k]
+
+        output = pysmo_trainer.get_confidence_intervals(a2_poly, confidence)
+
+        reg_coeffs = {
+            "z1": np.array(
+                [
+                    -75.26111111111476,
+                    -8.815277777775934,
+                    18.81527777777826,
+                    -2.2556956302821618e-13,
+                ]
+            ),
+            "z2": np.array(
+                [
+                    -3.0033074724377813,
+                    0.2491731318906352,
+                    1.7508268681094337,
+                    6.786238238021269e-15,
+                ]
+            ),
+        }
+
+        # Test that output has the right number of dictionary entries
+        assert len(output) == len(output_labels)
+        for i in output_labels:
+            # Test that the lower confidence bounds are correctly calculated.
+            assert pytest.approx(output[i]["Conf. int. lower"].values, abs=1e-9) == (
+                reg_coeffs[i] - tval * output[i]["Std. error"].values
+            )
+            # Test that the upper confidence bounds are correctly calculated.
+            assert pytest.approx(output[i]["Conf. int. upper"].values, abs=1e-9) == (
+                reg_coeffs[i] + tval * output[i]["Std. error"].values
+            )
+
     @pytest.mark.unit
     def test_evaluate_unisurrogate_rbf(self, pysmo_surr5_rbf):
         # Test ``evaluate_surrogate`` for RBF with one input/output