## Stage 1: Installing dependencies and setting up the environment

In [0]:
!pip install tensorflow-gpu==2.0.0.alpha0



## Stage 2: Importing project dependencies

In [0]:
import time
import numpy as np
import tensorflow as tf

In [0]:
tf.__version__

'2.0.0-alpha0'

## Stage 3: Dataset preprocessing

### Loading the MNIST dataset

In [0]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

### Image normalization

In [0]:
X_train = X_train / 255.
X_test = X_test / 255.

In [0]:
X_train.shape

(60000, 28, 28)

### Dataset reshaping

In [0]:
X_train = X_train.reshape(-1, 28*28)
X_test = X_test.reshape(-1, 28*28)

In [0]:
X_train.shape

(60000, 784)

## Stage 4: Distributed Training

### Defining normal (non distributed) model

In [0]:
model_normal = tf.keras.models.Sequential()

In [0]:
model_normal.add(tf.keras.layers.Dense(units=128, activation='relu', input_shape=(784,)))

In [0]:
model_normal.add(tf.keras.layers.Dropout(rate=0.2))

In [0]:
model_normal.add(tf.keras.layers.Dense(units=10, activation='softmax'))

In [0]:
model_normal.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])

### Defining the Distributed Strategy

In [0]:
distribute = tf.distribute.MirroredStrategy()

### Defining a distributed model

In [0]:
with distribute.scope():
  model_distributed = tf.keras.models.Sequential()
  model_distributed.add(tf.keras.layers.Dense(units=128, activation='relu', input_shape=(784,)))
  model_distributed.add(tf.keras.layers.Dropout(rate=0.2))
  model_distributed.add(tf.keras.layers.Dense(units=10, activation='softmax'))
  model_distributed.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])

### Speed comparison between normal training and distributed training process

In [0]:
start_time = time.time()
model_distributed.fit(X_train, y_train, epochs=10, batch_size=25)
print("Distributed training took: {}".format(time.time() - start_time))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Distributed training took: 173.06630730628967


In [0]:
start_time = time.time()
model_normal.fit(X_train, y_train, epochs=10, batch_size=25)
print("Normal training took: {}".format(time.time() - start_time))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Normal training took: 159.65520215034485
