In [0]:
def get_dataset(num_classes, rank=0, size=1):
  from tensorflow import keras
  
  (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data('MNIST-data-%d' % rank)
  x_train = x_train[rank::size]
  y_train = y_train[rank::size]
  x_test = x_test[rank::size]
  y_test = y_test[rank::size]
  x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
  x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
  x_train = x_train.astype('float32')
  x_test = x_test.astype('float32')
  x_train /= 255
  x_test /= 255
  y_train = keras.utils.to_categorical(y_train, num_classes)
  y_test = keras.utils.to_categorical(y_test, num_classes)
  return (x_train, y_train), (x_test, y_test)

In [0]:
def get_model(num_classes):
  from tensorflow.keras import models
  from tensorflow.keras import layers
  
  model = models.Sequential()
  model.add(layers.Conv2D(32, kernel_size=(3, 3),
                   activation='relu',
                   input_shape=(28, 28, 1)))
  model.add(layers.Conv2D(64, (3, 3), activation='relu'))
  model.add(layers.MaxPooling2D(pool_size=(2, 2)))
  model.add(layers.Dropout(0.25))
  model.add(layers.Flatten())
  model.add(layers.Dense(128, activation='relu'))
  model.add(layers.Dropout(0.5))
  model.add(layers.Dense(num_classes, activation='softmax'))
  return model

In [0]:
# Specify training parameters
batch_size = 128
epochs = 2
num_classes = 10

def train(learning_rate=0.1, batch_size=64, epochs=10, num_classes=10):
  from tensorflow import keras
  
  (x_train, y_train), (x_test, y_test) = get_dataset(num_classes)
  model = get_model(num_classes)

  optimizer = keras.optimizers.Adadelta(lr=learning_rate)

  model.compile(optimizer=optimizer,
                loss='categorical_crossentropy',
                metrics=['accuracy'])

  model.fit(x_train, y_train,
            batch_size=batch_size,
            epochs=epochs,
            verbose=2,
            validation_data=(x_test, y_test))
  return model

In [0]:
model = train(learning_rate=0.1, batch_size=64, epochs=10, num_classes=10)

In [0]:
_, (x_test, y_test) = get_dataset(num_classes)
loss, accuracy = model.evaluate(x_test, y_test, batch_size=128)
print("loss:", loss)
print("accuracy:", accuracy)

In [0]:
import os
import time
checkpoint_dir = '/dbfs/ml/MNISTDemo/train/{}/'.format(time.time())
os.makedirs(checkpoint_dir)
print(checkpoint_dir)

The following cell shows how to modify the single-node code of the previously defined `train()` function to take advantage of distributed training.

In [0]:
def train_hvd(learning_rate, batch_size, checkpoint_path):
  
  from tensorflow.keras import backend as K
  from tensorflow.keras.models import Sequential
  import tensorflow as tf
  from tensorflow import keras
  import horovod.tensorflow.keras as hvd

  hvd.init()

  gpus = tf.config.experimental.list_physical_devices('GPU')
  for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)
  if gpus:
    tf.config.experimental.set_visible_devices(gpus[hvd.local_rank()], 'GPU')

  (x_train, y_train), (x_test, y_test) = get_dataset(num_classes, hvd.rank(), hvd.size())
  model = get_model(num_classes)

  optimizer = keras.optimizers.Adadelta(lr=learning_rate * hvd.size())

  optimizer = hvd.DistributedOptimizer(optimizer)

  model.compile(optimizer=optimizer,
                loss='categorical_crossentropy',
                metrics=['accuracy'])

  callbacks = [
      hvd.callbacks.BroadcastGlobalVariablesCallback(0),
  ]

  if hvd.rank() == 0:
      callbacks.append(keras.callbacks.ModelCheckpoint(checkpoint_path, save_weights_only = True))

  model.fit(x_train, y_train,
            batch_size=batch_size,
            callbacks=callbacks,
            epochs=epochs,
            verbose=2,
            validation_data=(x_test, y_test))

In [0]:
from sparkdl import HorovodRunner

checkpoint_path = checkpoint_dir + '/checkpoint-{epoch}.ckpt'
learning_rate = 0.1
hr = HorovodRunner(np=2)
hr.run(train_hvd, checkpoint_path=checkpoint_path, learning_rate=learning_rate)

In [0]:
import tensorflow as tf

hvd_model = get_model(num_classes)
hvd_model.compile(optimizer=tf.keras.optimizers.Adadelta(lr=learning_rate),
                loss='categorical_crossentropy',
                metrics=['accuracy'])
hvd_model.load_weights(tf.train.latest_checkpoint(os.path.dirname(checkpoint_path)))

In [0]:
num_classes=10
_, (x_test, y_test) = get_dataset(num_classes)
loss, accuracy = hvd_model.evaluate(x_test, y_test, batch_size=128)
print("loaded model loss and accuracy:", loss, accuracy)

In [0]:
from hyperopt import fmin, tpe, hp, Trials, STATUS_OK
from sparkdl import HorovodRunner
def wrap_train(params):
  hr = HorovodRunner(np=2)
  loss, acc = hr.run(train_hvd,
                     learning_rate=params['learning_rate'],
                     batch_size=params['batch_size'],
                     checkpoint_dir=checkpoint_dir)
  return {'loss': loss, 'status': STATUS_OK}

In [0]:
import numpy as np
space = {
  'learning_rate': hp.loguniform('learning_rate', np.log(1e-4), np.log(1e-1)),
  'batch_size': hp.choice('batch_size', [32, 64, 128]),
}

In [0]:
algo=tpe.suggest

best_param = fmin(
  fn=train,
  space=space,
  algo=algo,
  max_evals=8,
  return_argmin=False,
)

In [0]:
print(best_param)

In [0]:
%pip install spark-tensorflow-distributor

In [0]:
import tensorflow as tf
NUM_WORKERS = 2
CLUSTER_GPUS = len(tf.config.list_logical_devices('GPU')) * NUM_WORKERS
USE_GPU = CLUSTER_GPUS > 0

In [0]:
def train(BUFFER_SIZE = 5000, BATCH_SIZE = 32, learning_rate=0.005):
  import tensorflow as tf
  import uuid
  def get_datasets():
    (mnist_images, mnist_labels), _ = tf.keras.datasets.mnist.load_data(path=str(uuid.uuid4())+'mnist.npz')
    dataset = tf.data.Dataset.from_tensor_slices((
      tf.cast(mnist_images[..., tf.newaxis] / 255.0, tf.float32),
      tf.cast(mnist_labels, tf.int64))
    )
    dataset = dataset.repeat().shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
    return dataset
  
  def get_model():
    model = tf.keras.Sequential([
      tf.keras.layers.Conv2D(32, 3, activation='relu', input_shape=(28, 28, 1)),
      tf.keras.layers.MaxPooling2D(),
      tf.keras.layers.Flatten(),
      tf.keras.layers.Dense(128, activation='relu'),
      tf.keras.layers.Dense(10, activation='softmax'),
    ])
    model.compile(
      loss=tf.keras.losses.sparse_categorical_crossentropy,
      optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate),
      metrics=['accuracy'],
    )
    return model
  
  train_datasets = get_datasets()
  worker_model = get_model()
  options = tf.data.Options()
  options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.DATA
  train_datasets = train_datasets.with_options(options)
  worker_model.fit(x=train_datasets, epochs=3, steps_per_epoch=5)

In [0]:
from spark_tensorflow_distributor import MirroredStrategyRunner
runner = MirroredStrategyRunner(num_slots=1, local_mode=True, use_gpu=USE_GPU)
runner.run(train)

In [0]:
NUM_SLOTS = CLUSTER_GPUS if USE_GPU else 4  
runner = MirroredStrategyRunner(num_slots=4, use_gpu=USE_GPU)
runner.run(train)

In [0]:
def train_custom_strategy():
  import tensorflow as tf
  strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy(
    tf.distribute.experimental.CollectiveCommunication.NCCL)
  with strategy.scope():
    import uuid
    BUFFER_SIZE = 10000
    BATCH_SIZE = 32
    def make_datasets():
      (mnist_images, mnist_labels), _ = tf.keras.datasets.mnist.load_data(path=str(uuid.uuid4())+'mnist.npz')
      dataset = tf.data.Dataset.from_tensor_slices((
          tf.cast(mnist_images[..., tf.newaxis] / 255.0,
                  tf.float32),
          tf.cast(mnist_labels, tf.int64))
      )
      dataset = dataset.repeat().shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
      return dataset

    def get_model():
      model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(32, 3, activation='relu', input_shape=(28, 28, 1)),
        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(138, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax'),
      ])
      model.compile(
        loss=tf.keras.losses.sparse_categorical_crossentropy,
        optimizer=tf.keras.optimizers.SGD(learning_rate=0.001),
        metrics=['accuracy'],
      )
      return model

    train_datasets = make_datasets()
    worker_model = get_model()
    options = tf.data.Options()
    options.experimental_distribute.auto_shard_policy = \
        tf.data.experimental.AutoShardPolicy.DATA
    train_datasets = train_datasets.with_options(options)
    worker_model.fit(x=train_datasets, epochs=3, steps_per_epoch=5)

In [0]:
runner = MirroredStrategyRunner(num_slots=1,
                                use_custom_strategy=True,
                                local_mode=True,
                                use_gpu=USE_GPU)
runner.run(train_custom_strategy)