Skip to content

Commit

Permalink
Crude Optimization of Hyperparameters (#9)
Browse files Browse the repository at this point in the history
* Add a fbf_utilities.py with commonly used functions to avoid import loops 

* Increased number of Polychord verification runs to 1000
* Moved responsibility for verification plotting to train_evidence_networks.py
* Refine verification plot presentation
* Added additional verification statistics

* Added rolling back to validation loss minimum
* Crude optimization of the number of epochs and batch size
* Crude optimization of decay length, initial learning rate, and number of data sets to train on 

* Increase default noise to be in line with the REACH pessimistic scenario
* Globalemu signals computed in batches of 100_000 to avoid memory errors 
* Style changes to APS column width 

* Version bump
  • Loading branch information
ThomasGesseyJones committed Sep 5, 2023
1 parent 3f58236 commit 07d12b4
Show file tree
Hide file tree
Showing 9 changed files with 520 additions and 323 deletions.
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Introduction

:Name: FullyBayesianForecastsExample
:Author: Thomas Gessey-Jones
:Version: 0.0.8
:Version: 0.0.9
:Homepage: https://github.com/ThomasGesseyJones/FullyBayesianForecastsExample
:Paper: TBD

Expand Down
2 changes: 1 addition & 1 deletion configuration.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ priors:
# Number of data sets generate from each model to use when verifying the
# network against PolyChord. Each method is used to evaluate log K and then
# the results are compared.
verification_data_sets_per_model: 50
verification_data_sets_per_model: 1000
#
#
# PLOTTING
Expand Down
120 changes: 96 additions & 24 deletions evidence_networks/evidence_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ class EvidenceNetwork:
The function should take the number of data sets to simulate as input,
and it should return the data as an array and any parameters of
interest as a pandas dataframe.
alpha: float, default = 2.0
The exponent of the leaky parity-odd transformation.
See arXiv:2305.11241 for details.
Attributes
----------
Expand All @@ -48,7 +51,8 @@ class EvidenceNetwork:
The labels used to validate the network
"""

def __init__(self, simulator_0: Callable, simulator_1: Callable):
def __init__(self, simulator_0: Callable, simulator_1: Callable,
alpha: float = 2.0):
"""Initialize an EvidenceNetwork object.
Parameters
Expand All @@ -63,6 +67,9 @@ def __init__(self, simulator_0: Callable, simulator_1: Callable):
The function should take the number of data sets to simulate as
input, and it should return the data as an array and any parameters
of interest as a pandas dataframe.
alpha: float, default = 2.0
The exponent of the leaky parity-odd transformation used in the
loss function and output transformation.
"""
# Check models are compatible
sample_data_0, _ = simulator_0(1)
Expand All @@ -79,6 +86,7 @@ def __init__(self, simulator_0: Callable, simulator_1: Callable):
self.simulator_1 = simulator_1
self._data_size = sample_data_0.size
self.trained = False
self.alpha = alpha

# Attributes to be defined later
self.nn_model = None
Expand Down Expand Up @@ -163,7 +171,11 @@ def train(self,
train_data_samples_per_model: int = 1_000_000,
validation_data_samples_per_model: int = 200_000,
epochs: int = 10,
batch_size: int = 100) -> None:
batch_size: int = 100,
initial_learning_rate: float = 1e-4,
decay_steps: int = 1000,
decay_rate: float = 0.95,
roll_back: bool = False) -> None:
"""Train the Bayes ratio network.
Parameters
Expand All @@ -180,6 +192,15 @@ def train(self,
The number of epochs to train for
batch_size: int, default=100
The batch size to use for training
initial_learning_rate: float, default=1e-4
The initial learning rate to use for training
decay_steps: int, default=1000
The number of steps per learning rate decay
decay_rate: float, default=0.95
The rate of learning rate decay
roll_back: bool, default=False
Whether to roll back the network to validation loss minimum at
the end of training
"""
# Set-up NN, default from arXiv:2305.11241 appendix if not given
if nn_model is None:
Expand All @@ -188,12 +209,12 @@ def train(self,

# Compile model, using details from arXiv:2305.11241
self.nn_model.compile(
loss=l_pop_exponential_loss,
loss=generate_l_pop_exponential_loss(self.alpha),
optimizer=keras.optimizers.Adam(
learning_rate=ExponentialDecay(
initial_learning_rate=1e-4,
decay_steps=1000,
decay_rate=0.95,
initial_learning_rate=initial_learning_rate,
decay_steps=decay_steps,
decay_rate=decay_rate,
)),
metrics=["accuracy"],
)
Expand All @@ -211,12 +232,46 @@ def train(self,
self.validation_labels = validation_labels_data

# Train model and set trained flag
self.nn_model.fit(sample_data, labels_data,
batch_size=batch_size,
epochs=epochs,
verbose=2,
validation_data=(validation_sample_data,
validation_labels_data))
if not roll_back:
self.nn_model.fit(sample_data, labels_data,
batch_size=batch_size,
epochs=epochs,
verbose=2,
validation_data=(validation_sample_data,
validation_labels_data))
self.trained = True
return

# Training with roll back, train for one epoch at a time and check
# validation loss after each epoch. If validation loss is lower than
# the previous minimum, save the weights. At the end of training, roll
# back to the weights with the lowest validation loss.
minimum_val_loss = np.inf
minimum_val_loss_weights = None
minimum_epoch_num = 0
for epoch_num in range(epochs):
# Train for one epoch
self.nn_model.fit(sample_data, labels_data,
batch_size=batch_size,
epochs=1,
verbose=2,
validation_data=(validation_sample_data,
validation_labels_data))

# Check validation loss against previous minimum, and save weights
# if new minimum
val_loss = self.nn_model.evaluate(validation_sample_data,
validation_labels_data,
verbose=0)[0]
if val_loss < minimum_val_loss:
minimum_val_loss = val_loss
minimum_val_loss_weights = self.nn_model.get_weights()
minimum_epoch_num = epoch_num + 1

# Roll back to weights with the lowest validation loss
print(f"Reverting to minimum validation loss model, which was after "
f"epoch {minimum_epoch_num}.")
self.nn_model.set_weights(minimum_val_loss_weights)
self.trained = True
return

Expand All @@ -240,7 +295,7 @@ def evaluate_log_bayes_ratio(self, data: np.ndarray) -> np.ndarray:
data = data.reshape(1, -1)

nn_output = self.nn_model(tf.constant(data), training=False)
return leaky_parity_odd_transformation(nn_output)
return leaky_parity_odd_transformation(nn_output, self.alpha)

def evaluate_bayes_ratio(self, data: np.ndarray) -> np.ndarray:
"""Evaluate the Bayes ratio between model 1 and 0.
Expand Down Expand Up @@ -280,7 +335,8 @@ def load(self, filename: str):
"""
self.nn_model = keras.models.load_model(
filename,
custom_objects={'l_pop_exponential_loss': l_pop_exponential_loss})
custom_objects={'l_pop_exponential_loss':
generate_l_pop_exponential_loss(self.alpha)})
self.trained = True

def blind_coverage_test(self,
Expand Down Expand Up @@ -414,20 +470,36 @@ def leaky_parity_odd_transformation(x: np.ndarray,
return x * k_backend.pow(k_backend.abs(x), alpha - 1) + x


def l_pop_exponential_loss(model_label: float, f_x: np.ndarray) -> np.ndarray:
"""l-POP-Exponential loss function from arxiv:2305.11241.
def generate_l_pop_exponential_loss(alpha: float = 2.0) -> Callable:
"""Generate the l-POP-Exponential loss function from arxiv:2305.11241.
Parameters
----------
model_label: float
The true value of the model label (either 0.0 or 1.0)
f_x: np.ndarray
The value output by network
alpha: float, default=2.0
The exponent of the leaky parity-odd transformation.
Returns
-------
loss: np.ndarray
The loss function value
l_pop_exponential_loss: Callable
The l-POP-Exponential loss function
"""
return k_backend.exp((0.5 - model_label) *
leaky_parity_odd_transformation(f_x))

def l_pop_exponential_loss(model_label: float,
f_x: np.ndarray) -> np.ndarray:
"""l-POP-Exponential loss function from arxiv:2305.11241.
Parameters
----------
model_label: float
The true value of the model label (either 0.0 or 1.0)
f_x: np.ndarray
The value output by network
Returns
-------
loss: np.ndarray
The loss function value
"""
return k_backend.exp((0.5 - model_label) *
leaky_parity_odd_transformation(f_x, alpha))
return l_pop_exponential_loss

0 comments on commit 07d12b4

Please sign in to comment.