In [2]:
%load_ext autoreload
%autoreload 2

import utils
from visualization.simple_data_vis import histograms
import surrogate_models.dab_nn_defs as engine
import kerastuner as kt
import matplotlib.pyplot as plt


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
INFO:tensorflow:Enabling eager execution
INFO:tensorflow:Enabling v2 tensorshape
INFO:tensorflow:Enabling resource variables
INFO:tensorflow:Enabling tensor equality
INFO:tensorflow:Enabling control flow v2


In [3]:
# load data from just-private/data
filename = 'mol_res_scan_results_7.csv'
data = utils.load_data(filename)

In [None]:
# _,_ = histograms(data)

In [18]:
# since currently data is just one big dataframe, select model inputs as X and purity, yield as Y
x = [*data.columns[:2],*data.columns[4:]]
y = data.columns[2:4]

# split data into train and test
train_x, test_x, train_y, test_y = utils.chroma_train_test_split(data, x,y)

In [19]:
train_x, test_x = utils.preprocessing([train_x, test_x], standarize = True, skip = ['cut 1','cut 2'])

In [None]:
# define Probabilistic Bayesian Neural Network
prob_bnn_model = engine.create_probablistic_bnn_model(
    FEATURE_NAMES = data.columns[4:],
    TARGET_NAMES = data.columns[2:4], 
    train_size = train_size, 
    n_outputs = y.shape[1],
    hidden_units = [16,8,4],
    name = 'PBNN_'+filename
    )

# specify train/test routine 
engine.run_experiment(
    model = prob_bnn_model, 
    loss = negative_loglikelihood, 
    learning_rate = 0.05,
    num_epochs = 400,
    train_dataset = train_dataset, 
    test_dataset = test_dataset
    )

In [None]:
n_samples = 50
sample_inputs, sample_outputs = list(test_dataset.unbatch().shuffle(dataset_size).batch(n_samples))[0]

In [None]:
def model_builder(hp):
    hp_layers = hp.Int('layers', min_value=2, max_value=10)
    hp_units = hp.Int('units', min_value=4, max_value = 32, step=2)
    for L in range(hp_layers):
        hidden_units = [np.ceil(hp_units**(l/L)) for l in range(L)]

    # define Probabilistic Bayesian Neural Network 
    model = engine.create_probablistic_bnn_model(
        FEATURE_NAMES = data.columns[4:],
        TARGET_NAMES = data.columns[2:4], 
        train_size = train_size, 
        n_outputs = y.shape[1],
        hidden_units = hidden_units,
        name = 'PBNN_'+filename
        )

    model.compile(
        optimizer=tf.keras.optimizers.RMSprop(learning_rate=hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])),
        loss=engine.negative_loglikelihood,
        metrics=[tf.keras.metrics.MeanSquaredError()]
    )

    return model

In [None]:
tuner = kt.Hyperband(model_builder,
                     objective='val_loss',
                     max_epochs=400,
                     factor=2,
                     directory="surrogate_models/.hypertuning/",
                     project_name='intro_to_kt')

In [None]:
tuner.search_space_summary()

In [None]:
tuner.search(train_dataset, validation_data = test_dataset, epochs = 100)

In [None]:
best = tuner.get_best_models()

In [None]:
best_distributions = best[0](sample_inputs)
best_means = [i.mean().numpy().tolist() for i in best_distributions]


In [None]:
best_means

In [None]:
test_pbnn_model = engine.create_probablistic_bnn_model(
    FEATURE_NAMES = data.columns[4:],
    TARGET_NAMES = data.columns[2:4], 
    train_size = train_size, 
    n_outputs = y.shape[1],
    hidden_units = [512,32],
    name = 'test_PBNN_'+filename
    )

# specify train/test routine 
engine.run_experiment(
    model = test_pbnn_model, 
    loss = keras.losses.MeanSquaredError(),
    learning_rate = 0.005,
    num_epochs = 300,
    train_dataset = train_dataset, 
    test_dataset = test_dataset,
    verbose = 1
    )

In [None]:
import matplotlib.pyplot as plt
N = 10
out = []
for i in range(N):
    out.append(test_pbnn_model(sample_inputs))
out = tf.stack(out,-1)

colors = ['k','r']
for i in range(2):
    plt.errorbar(list(sample_outputs.values())[i].numpy(), out[i,:,:,:].numpy().mean(-1),
    yerr = out[i,:,:,:].numpy().std(-1).squeeze(),
    label = data.columns[2:4][i],
    marker = 'o', color = colors[i], alpha = 0.5,
    ls = 'none')
plt.plot([0,1],[0,1],'k',alpha=0.25)
plt.legend(loc='lower right')
plt.xlim(0,1)
plt.ylim(0,1)
plt.gca().set_aspect('equal')
plt.xlabel('true')
plt.ylabel('predicted')


In [None]:
out