In [1]:
# This notebook contains code for executing the tasks described (and depicted) in
# Tutorial Step 2: Initializing the Classifier

# First, replicate the steps of Tutorial Step 1:
import sympy as sym
from sympy import I
from sympy.physics.quantum.dagger import Dagger

from bfbrain import DataManager

   # Write a SymPy function representing the scalar potential.
def V_2HDM(phi, lam):
    Phi1 = sym.Matrix([0, phi[0]])
    Phi2 = sym.Matrix([phi[1] + I*phi[3], phi[2] + I*phi[4]])
    phi1sq = Dagger(Phi1).dot(Phi1)
    phi2sq = sym.simplify(Dagger(Phi2).dot(Phi2))
    phi12 = sym.simplify(Dagger(Phi1).dot(Phi2))
    phi21 = sym.simplify(Dagger(Phi2).dot(Phi1))

    QVec = (sym.Matrix([(phi1sq**2)/2, (phi2sq**2)/2,
                 phi1sq*phi2sq, phi12*phi21, 
                 (phi12**2 + phi21**2)/2,
                 I*(phi12**2 - phi21**2)/2,
                 phi1sq*(phi12 + phi21),
                 I*phi1sq*(phi12 - phi21),
                 phi2sq*(phi12 + phi21),
                 I*phi2sq*(phi12-phi21)])).applyfunc(sym.simplify)
    return QVec.dot(lam)

# Initialize a DataManager object which will handle 
# data generation and oracle labelling.
dm = DataManager.from_func(V_2HDM, 5, 10, niter = 100)

In [2]:
# Now initialize the BFBLearner object with 5 hidden layers, 128 neurons in each hidden layer,
# 1000 initial training points, and monitoring two performance metrics, the
# F score on a labelled validation set and the estimated change in F score between active learning
# iterations on a larger but unlabelled set.
from bfbrain import BFBLearner, ValidationFScore, UnlabelledDeltaF

# Running this line should take a few minutes, due to the need to label the validation set.
AL = BFBLearner.init_for_first_run(dm, 5, 128, [ValidationFScore(), UnlabelledDeltaF(dm.create_random_lambdas(1000000, validation = True))], 1000)

# For later use, save AL as a saved BFBLearner. A version of this saved object is included in the examples folder already.
AL.save_AL_state('saved_AL_untrained')

creating training data...
recompiling vectorized_minTest...
done!
creating validation data...
recompiling vectorized_minTest...
done!


In [3]:
# Demonstrate loading and adjusting hyperparameters here.
from bfbrain import MCModelEvaluation

# Load the BFBLearner we just saved. It should be a copy of AL.
loaded_AL = BFBLearner.from_file('saved_AL_untrained')

# Redefine the model to feature 3 layers of 256 neurons instead of 5 layers of 128 neurons.
loaded_AL.redefine_model(3, 256)

# Adjust the prior length scale l (weights have a Bayesian prior of N(0, 1/l**2))
loaded_AL.set_l_constant(0.01)

# Add a new performance metric.
loaded_AL.add_metrics(MCModelEvaluation())

In [4]:
# Advanced Usage: Custom Performance Metrics

# This is an example of a custom performance metric, which may be included in the list of
# performance metrics when initializing a BFBLearner. For more information, see
# the relevant section of the tutorial.

from bfbrain import TrainMetric
import numpy as np

class TrainPosFraction(TrainMetric):
    """
    Implement a new metric which records the fraction of
    each newly-added set of training points that the
    oracle labels as positive.
    """

    def __init__(self, name = 'pos_fraction'):
        super().__init__(name = name)

    def performance_check(self, model, lams, labels):
        """The class overwrites the superclass's abstract 
        performance_check method with a concrete
        computation. This is the value that is recorded
        in the performance metric's status_history object.
        Notice that although not all arguments are used in the
        computation of this result, the arguments of
        performance_check are set by the parent class.
        """
        return np.count_nonzero(labels) / len(labels)