# Distributed random-search hyper-parameter optimization of the Keras RPV classifier

In [1]:
# System imports
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import
import os

# External imports
import ipyparallel as ipp
import numpy as np
import matplotlib.pyplot as plt

# Local imports
from rpv import load_dataset

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Connect to cluster

In [2]:
# Use default profile for now
c = ipp.Client()
print('Worker IDs:', c.ids)

Worker IDs: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]


In [3]:
# Data config
n_train = 32000 #412416
n_valid = 16000 #137471
n_test = 16000 #137471
input_dir = '/global/cscratch1/sd/sfarrell/atlas-rpv-images'

In [4]:
((train_input, train_labels, train_weights),
 (valid_input, valid_labels, valid_weights),
 (test_input, test_labels, test_weights)) = load_dataset(input_dir, n_train, n_valid, n_test)

In [5]:
print('train shape:', train_input.shape, 'Mean label:', train_labels.mean())
print('valid shape:', valid_input.shape, 'Mean label:', valid_labels.mean())
print('test shape: ', test_input.shape, 'Mean label:', test_labels.mean())

train shape: (32000, 64, 64, 1) Mean label: 0.42815625
valid shape: (16000, 64, 64, 1) Mean label: 0.4200625
test shape:  (16000, 64, 64, 1) Mean label: 0.431375


## Configure the hyper-parameter search tasks

In [6]:
# Temporarily making things reproducible for development
np.random.seed(0)

# Define the hyper-parameter search points
n_hpo_trials = 64
h1 = np.random.choice([4, 8, 16, 32, 64], size=n_hpo_trials)
h2 = np.random.choice([4, 8, 16, 32, 64], size=n_hpo_trials)
h3 = np.random.choice([8, 16, 32, 64, 128], size=n_hpo_trials)
conv_sizes = np.stack([h1, h2, h3], axis=1)
fc_sizes = np.random.choice([32, 64, 128, 256], size=(n_hpo_trials, 1))
lr = np.random.choice([0.0001, 0.001, 0.01], size=n_hpo_trials)
dropout = np.random.rand(n_hpo_trials)
optimizer = np.random.choice(['Adadelta', 'Adam', 'Nadam'], size=n_hpo_trials)

# Training config
batch_size = 64
n_epochs = 8
checkpoint_dir = '/global/cscratch1/sd/sfarrell/cori-interactive-dl/rpv_hpo'

## Run the hyper-parameter search

In [7]:
def build_and_train(train_input, train_labels, valid_input, valid_labels,
                    conv_sizes, fc_sizes, dropout, optimizer, lr,
                    batch_size, n_epochs, checkpoint_file=None, verbose=2):
    """Run training for one set of hyper-parameters.
    TODO: add support for model checkpointing."""
    from rpv import build_model, train_model
    # Build the model
    model = build_model(train_input.shape[1:],
                        conv_sizes=conv_sizes, fc_sizes=fc_sizes,
                        dropout=dropout, optimizer=optimizer, lr=lr)
    # Train the model
    history = train_model(model, train_input=train_input, train_labels=train_labels,
                          valid_input=valid_input, valid_labels=valid_labels,
                          batch_size=batch_size, n_epochs=n_epochs,
                          checkpoint_file=checkpoint_file, verbose=verbose)
    return history.history

In [8]:
# Load-balanced view
lv = c.load_balanced_view()

# Loop over hyper-parameter sets
results = []
for ihp in range(n_hpo_trials):
    print('Hyper-parameter trial', ihp)
    print('  Conv sizes', conv_sizes[ihp])
    print('  FC sizes', fc_sizes[ihp])
    print('  Dropout %.4f' % dropout[ihp])
    print('  Optimizer', optimizer[ihp])
    print('  LearningRate', lr[ihp])
    checkpoint_file = os.path.join(checkpoint_dir, 'model_%i.h5' % ihp)
    result = lv.apply(build_and_train,
                      train_input, train_labels, valid_input, valid_labels,
                      conv_sizes=conv_sizes[ihp], fc_sizes=fc_sizes[ihp],
                      dropout=dropout[ihp], optimizer=optimizer[ihp], lr=lr[ihp],
                      batch_size=batch_size, n_epochs=n_epochs,
                      checkpoint_file=checkpoint_file)
    results.append(result)

Hyper-parameter trial 0
  Conv sizes [64 64 32]
  FC sizes [32]
  Dropout 0.1856
  Optimizer Adam
  LearningRate 0.001
Hyper-parameter trial 1
  Conv sizes [ 4  8 64]
  FC sizes [128]
  Dropout 0.9528
  Optimizer Adam
  LearningRate 0.0001
Hyper-parameter trial 2
  Conv sizes [32 16  8]
  FC sizes [128]
  Dropout 0.6875
  Optimizer Adam
  LearningRate 0.0001
Hyper-parameter trial 3
  Conv sizes [ 32  64 128]
  FC sizes [256]
  Dropout 0.2155
  Optimizer Nadam
  LearningRate 0.0001
Hyper-parameter trial 4
  Conv sizes [32 32  8]
  FC sizes [64]
  Dropout 0.9474
  Optimizer Adadelta
  LearningRate 0.0001
Hyper-parameter trial 5
  Conv sizes [ 8 64  8]
  FC sizes [64]
  Dropout 0.7309
  Optimizer Adadelta
  LearningRate 0.0001
Hyper-parameter trial 6
  Conv sizes [32 64 32]
  FC sizes [32]
  Dropout 0.2539
  Optimizer Adadelta
  LearningRate 0.0001
Hyper-parameter trial 7
  Conv sizes [16 64 64]
  FC sizes [256]
  Dropout 0.2133
  Optimizer Adam
  LearningRate 0.001
Hyper-parameter trial 

In [17]:
results

[<AsyncResult: build_and_train:finished>,
 <AsyncResult: build_and_train:finished>,
 <AsyncResult: build_and_train:finished>,
 <AsyncResult: build_and_train:finished>,
 <AsyncResult: build_and_train:finished>,
 <AsyncResult: build_and_train:finished>,
 <AsyncResult: build_and_train:finished>,
 <AsyncResult: build_and_train:finished>,
 <AsyncResult: build_and_train:finished>,
 <AsyncResult: build_and_train:finished>,
 <AsyncResult: build_and_train:finished>,
 <AsyncResult: build_and_train:finished>,
 <AsyncResult: build_and_train:finished>,
 <AsyncResult: build_and_train:finished>,
 <AsyncResult: build_and_train:finished>,
 <AsyncResult: build_and_train:finished>,
 <AsyncResult: build_and_train:finished>,
 <AsyncResult: build_and_train:finished>,
 <AsyncResult: build_and_train:finished>,
 <AsyncResult: build_and_train:finished>,
 <AsyncResult: build_and_train:finished>,
 <AsyncResult: build_and_train:finished>,
 <AsyncResult: build_and_train:finished>,
 <AsyncResult: build_and_train:fin

In [18]:
histories = [ar.get() for ar in results]

In [19]:
ar = results[0]

In [20]:
ar.display_outputs()

Train on 32000 samples, validate on 16000 samples
Epoch 1/8
 - 52s - loss: 0.2764 - acc: 0.8855 - val_loss: 0.2330 - val_acc: 0.9058
Epoch 2/8
 - 51s - loss: 0.2409 - acc: 0.9058 - val_loss: 0.2319 - val_acc: 0.9057
Epoch 3/8
 - 51s - loss: 0.2374 - acc: 0.9044 - val_loss: 0.2246 - val_acc: 0.9076
Epoch 4/8
 - 51s - loss: 0.2293 - acc: 0.9080 - val_loss: 0.2276 - val_acc: 0.9080
Epoch 5/8
 - 51s - loss: 0.1829 - acc: 0.9270 - val_loss: 0.0816 - val_acc: 0.9690
Epoch 6/8
 - 51s - loss: 0.0762 - acc: 0.9728 - val_loss: 0.0572 - val_acc: 0.9793
Epoch 7/8
 - 51s - loss: 0.0557 - acc: 0.9805 - val_loss: 0.0428 - val_acc: 0.9852
Epoch 8/8
 - 51s - loss: 0.0503 - acc: 0.9824 - val_loss: 0.0464 - val_acc: 0.9841


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Model selection and evaluation

In [21]:
def draw_history(h):
    plt.figure(figsize=(9,4))
    # Loss
    plt.subplot(121)
    plt.plot(h['loss'], label='Training')
    plt.plot(h['val_loss'], label='Validation')
    plt.xlim(xmin=0, xmax=len(h['loss'])-1)
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend(loc=0)
    # Accuracy
    plt.subplot(122)
    plt.plot(h['acc'], label='Training')
    plt.plot(h['val_acc'], label='Validation')
    plt.xlim(xmin=0, xmax=len(h['loss'])-1)
    plt.ylim((0, 1))
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(loc=0)
    plt.tight_layout()

In [22]:
%matplotlib notebook

In [23]:
last_scores = np.array([h['val_acc'][-1] for h in histories])
best_scores = np.array([max(h['val_acc']) for h in histories])

In [24]:
# Find the best in terms of final validation set accuracy
i = best_scores.argmax()
print('Hyperparameters: trial %i conv %s fc %s dropout %.3f opt %s, lr %.3f' %
      (i, conv_sizes[i], fc_sizes[i], dropout[i], optimizer[i], lr[i]))
print('  Last validation accuracy %.4f' % last_scores[i])
print('  Best validation accuracy %.4f' % best_scores[i])

draw_history(histories[i])

Hyperparameters: trial 13 conv [ 8 64 64] fc [256] dropout 0.464 opt Adam, lr 0.001
  Last validation accuracy 0.9801
  Best validation accuracy 0.9859


<IPython.core.display.Javascript object>

In [25]:
# Find the worst in terms of final validation set accuracy
i = best_scores.argmin()
print('Hyperparameters: trial %i conv %s fc %s dropout %.3f opt %s, lr %.3f' %
      (i, conv_sizes[i], fc_sizes[i], dropout[i], optimizer[i], lr[i]))
print('  Last validation accuracy %.4f' % last_scores[i])
print('  Best validation accuracy %.4f' % best_scores[i])
draw_history(histories[i])

Hyperparameters: trial 6 conv [32 64 32] fc [32] dropout 0.254 opt Adadelta, lr 0.000
  Last validation accuracy 0.3240
  Best validation accuracy 0.4234


<IPython.core.display.Javascript object>

### Test set evaluation

Here we load the best selected model and evaluate final performance on the test set.

In [32]:
from sklearn import metrics

def summarize_metrics(labels, outputs, threshold=0.5, weights=None):
    preds = outputs > threshold
    #print('Metrics summaries with threshold of %.3f' % threshold)
    print('Accuracy:   %.4f' % metrics.accuracy_score(labels, preds, sample_weight=weights))
    print('Purity:     %.4f' % metrics.precision_score(labels, preds, sample_weight=weights))
    print('Efficiency: %.4f' % metrics.recall_score(labels, preds, sample_weight=weights))

def draw_roc(labels, outputs, weights=None, ax=None):
    fpr, tpr, _ = metrics.roc_curve(labels, outputs, sample_weight=weights)
    auc = metrics.roc_auc_score(labels, outputs, sample_weight=weights)
    if ax is None:
        fig, ax = plt.subplots()
    ax.plot(fpr, tpr, label='CNN, AUC=%.3f' % auc)
    ax.plot([0, 1], [0, 1], '--', label='Random')
    ax.set_xlabel('False positive rate')
    ax.set_ylabel('True positive rate')
    ax.legend(loc=0)

In [26]:
i = best_scores.argmax()
model_file = os.path.join(checkpoint_dir, 'model_%i.h5' % i)
model = keras.models.load_model(model_file)

In [30]:
test_output = model.predict(test_input)
test_output = test_output.squeeze(-1)

In [33]:
# Unweighted results
print('Unweighted metrics')
summarize_metrics(test_labels, test_output)
print()

# Weighted results
print('Weighted metrics')
summarize_metrics(test_labels, test_output, weights=test_weights)

Unweighted metrics
Accuracy:   0.9809
Purity:     0.9624
Efficiency: 0.9946

Weighted metrics
Accuracy:   0.8945
Purity:     0.0005
Efficiency: 0.9946


In [41]:
fig, axs = plt.subplots(1, 2, figsize=(9,4))
draw_roc(test_labels, test_output, ax=axs[0])
draw_roc(test_labels, test_output, ax=axs[1], weights=test_weights)
axs[0].set_xlim([0, 0.01])
axs[0].set_title('Unweighted')
axs[1].set_xlim([0, 0.01])
axs[1].set_title('Weighted');

<IPython.core.display.Javascript object>