Crude Optimization of Hyperparameters (#9)

* Add a fbf_utilities.py with commonly used functions to avoid import loops * Increased number of Polychord verification runs to 1000 * Moved responsibility for verification plotting to train_evidence_networks.py * Refine verification plot presentation * Added additional verification statistics * Added rolling back to validation loss minimum * Crude optimization of the number of epochs and batch size * Crude optimization of decay length, initial learning rate, and number of data sets to train on * Increase default noise to be in line with the REACH pessimistic scenario * Globalemu signals computed in batches of 100_000 to avoid memory errors * Style changes to APS column width * Version bump
ThomasGesseyJones · Sep 5, 2023 · 07d12b4 · 07d12b4
1 parent 3f58236
commit 07d12b4
Show file tree

Hide file tree

Showing 9 changed files with 520 additions and 323 deletions.
diff --git a/README.rst b/README.rst
@@ -7,7 +7,7 @@ Introduction
 
 :Name: FullyBayesianForecastsExample
 :Author: Thomas Gessey-Jones
-:Version: 0.0.8
+:Version: 0.0.9
 :Homepage: https://github.com/ThomasGesseyJones/FullyBayesianForecastsExample
 :Paper: TBD
 

diff --git a/configuration.yaml b/configuration.yaml
@@ -58,7 +58,7 @@ priors:
 # Number of data sets generate from each model to use when verifying the
 # network against PolyChord. Each method is used to evaluate log K and then
 # the results are compared.
-verification_data_sets_per_model: 50
+verification_data_sets_per_model: 1000
 #
 #
 # PLOTTING

diff --git a/evidence_networks/evidence_network.py b/evidence_networks/evidence_network.py
@@ -31,6 +31,9 @@ class EvidenceNetwork:
         The function should take the number of data sets to simulate as input,
         and it should return the data as an array and any parameters of
         interest as a pandas dataframe.
+    alpha: float, default = 2.0
+        The exponent of the leaky parity-odd transformation.
+        See arXiv:2305.11241 for details.
 
     Attributes
     ----------
@@ -48,7 +51,8 @@ class EvidenceNetwork:
         The labels used to validate the network
     """
 
-    def __init__(self, simulator_0: Callable, simulator_1: Callable):
+    def __init__(self, simulator_0: Callable, simulator_1: Callable,
+                 alpha: float = 2.0):
         """Initialize an EvidenceNetwork object.
 
         Parameters
@@ -63,6 +67,9 @@ def __init__(self, simulator_0: Callable, simulator_1: Callable):
             The function should take the number of data sets to simulate as
             input, and it should return the data as an array and any parameters
             of interest as a pandas dataframe.
+        alpha: float, default = 2.0
+            The exponent of the leaky parity-odd transformation used in the
+            loss function and output transformation.
         """
         # Check models are compatible
         sample_data_0, _ = simulator_0(1)
@@ -79,6 +86,7 @@ def __init__(self, simulator_0: Callable, simulator_1: Callable):
         self.simulator_1 = simulator_1
         self._data_size = sample_data_0.size
         self.trained = False
+        self.alpha = alpha
 
         # Attributes to be defined later
         self.nn_model = None
@@ -163,7 +171,11 @@ def train(self,
               train_data_samples_per_model: int = 1_000_000,
               validation_data_samples_per_model: int = 200_000,
               epochs: int = 10,
-              batch_size: int = 100) -> None:
+              batch_size: int = 100,
+              initial_learning_rate: float = 1e-4,
+              decay_steps: int = 1000,
+              decay_rate: float = 0.95,
+              roll_back: bool = False) -> None:
         """Train the Bayes ratio network.
 
         Parameters
@@ -180,6 +192,15 @@ def train(self,
             The number of epochs to train for
         batch_size: int, default=100
             The batch size to use for training
+        initial_learning_rate: float, default=1e-4
+            The initial learning rate to use for training
+        decay_steps: int, default=1000
+            The number of steps per learning rate decay
+        decay_rate: float, default=0.95
+            The rate of learning rate decay
+        roll_back: bool, default=False
+            Whether to roll back the network to validation loss minimum at
+            the end of training
         """
         # Set-up NN, default from arXiv:2305.11241 appendix if not given
         if nn_model is None:
@@ -188,12 +209,12 @@ def train(self,
 
         # Compile model, using details from arXiv:2305.11241
         self.nn_model.compile(
-            loss=l_pop_exponential_loss,
+            loss=generate_l_pop_exponential_loss(self.alpha),
             optimizer=keras.optimizers.Adam(
                 learning_rate=ExponentialDecay(
-                    initial_learning_rate=1e-4,
-                    decay_steps=1000,
-                    decay_rate=0.95,
+                    initial_learning_rate=initial_learning_rate,
+                    decay_steps=decay_steps,
+                    decay_rate=decay_rate,
                 )),
             metrics=["accuracy"],
         )
@@ -211,12 +232,46 @@ def train(self,
         self.validation_labels = validation_labels_data
 
         # Train model and set trained flag
-        self.nn_model.fit(sample_data, labels_data,
-                          batch_size=batch_size,
-                          epochs=epochs,
-                          verbose=2,
-                          validation_data=(validation_sample_data,
-                                           validation_labels_data))
+        if not roll_back:
+            self.nn_model.fit(sample_data, labels_data,
+                              batch_size=batch_size,
+                              epochs=epochs,
+                              verbose=2,
+                              validation_data=(validation_sample_data,
+                                               validation_labels_data))
+            self.trained = True
+            return
+
+        # Training with roll back, train for one epoch at a time and check
+        # validation loss after each epoch. If validation loss is lower than
+        # the previous minimum, save the weights. At the end of training, roll
+        # back to the weights with the lowest validation loss.
+        minimum_val_loss = np.inf
+        minimum_val_loss_weights = None
+        minimum_epoch_num = 0
+        for epoch_num in range(epochs):
+            # Train for one epoch
+            self.nn_model.fit(sample_data, labels_data,
+                              batch_size=batch_size,
+                              epochs=1,
+                              verbose=2,
+                              validation_data=(validation_sample_data,
+                                               validation_labels_data))
+
+            # Check validation loss against previous minimum, and save weights
+            # if new minimum
+            val_loss = self.nn_model.evaluate(validation_sample_data,
+                                              validation_labels_data,
+                                              verbose=0)[0]
+            if val_loss < minimum_val_loss:
+                minimum_val_loss = val_loss
+                minimum_val_loss_weights = self.nn_model.get_weights()
+                minimum_epoch_num = epoch_num + 1
+
+        # Roll back to weights with the lowest validation loss
+        print(f"Reverting to minimum validation loss model, which was after "
+              f"epoch {minimum_epoch_num}.")
+        self.nn_model.set_weights(minimum_val_loss_weights)
         self.trained = True
         return
 
@@ -240,7 +295,7 @@ def evaluate_log_bayes_ratio(self, data: np.ndarray) -> np.ndarray:
             data = data.reshape(1, -1)
 
         nn_output = self.nn_model(tf.constant(data), training=False)
-        return leaky_parity_odd_transformation(nn_output)
+        return leaky_parity_odd_transformation(nn_output, self.alpha)
 
     def evaluate_bayes_ratio(self, data: np.ndarray) -> np.ndarray:
         """Evaluate the Bayes ratio between model 1 and 0.
@@ -280,7 +335,8 @@ def load(self, filename: str):
         """
         self.nn_model = keras.models.load_model(
             filename,
-            custom_objects={'l_pop_exponential_loss': l_pop_exponential_loss})
+            custom_objects={'l_pop_exponential_loss':
+                            generate_l_pop_exponential_loss(self.alpha)})
         self.trained = True
 
     def blind_coverage_test(self,
@@ -414,20 +470,36 @@ def leaky_parity_odd_transformation(x: np.ndarray,
     return x * k_backend.pow(k_backend.abs(x), alpha - 1) + x
 
 
-def l_pop_exponential_loss(model_label: float, f_x: np.ndarray) -> np.ndarray:
-    """l-POP-Exponential loss function from arxiv:2305.11241.
+def generate_l_pop_exponential_loss(alpha: float = 2.0) -> Callable:
+    """Generate the l-POP-Exponential loss function from arxiv:2305.11241.
 
     Parameters
     ----------
-    model_label: float
-        The true value of the model label (either 0.0 or 1.0)
-    f_x: np.ndarray
-        The value output by network
+    alpha: float, default=2.0
+        The exponent of the leaky parity-odd transformation.
 
     Returns
     -------
-    loss: np.ndarray
-        The loss function value
+    l_pop_exponential_loss: Callable
+        The l-POP-Exponential loss function
     """
-    return k_backend.exp((0.5 - model_label) *
-                         leaky_parity_odd_transformation(f_x))
+
+    def l_pop_exponential_loss(model_label: float,
+                               f_x: np.ndarray) -> np.ndarray:
+        """l-POP-Exponential loss function from arxiv:2305.11241.
+
+        Parameters
+        ----------
+        model_label: float
+            The true value of the model label (either 0.0 or 1.0)
+        f_x: np.ndarray
+            The value output by network
+
+        Returns
+        -------
+        loss: np.ndarray
+            The loss function value
+        """
+        return k_backend.exp((0.5 - model_label) *
+                             leaky_parity_odd_transformation(f_x, alpha))
+    return l_pop_exponential_loss