## Step 1 Install Dependencis 

In [0]:
!pip install -q tensorflow-gpu==2.0.0-beta1

## Step 2 Imports

In [2]:
import time
import numpy as np
import tensorflow as tf
tf.__version__

'2.0.0-beta1'

## Step 3 Dataset Preprocessing

### Loading the MNIST Dataset

In [0]:
(X_train,y_train),(X_test,y_test) = tf.keras.datasets.mnist.load_data()

### Image Normalization

In [0]:
X_train = X_train / 255.
X_test = X_test / 255.

In [5]:
X_train.shape

(60000, 28, 28)

### Dataset reshaping

In [0]:
X_train = X_train.reshape(-1,28*28)
X_test = X_test.reshape(-1,28*28)

In [7]:
X_train.shape

(60000, 784)

## Step 4 Distributed Training

### Defining a non distributed model

In [0]:
model_normal = tf.keras.models.Sequential()

### Building the non distributed model

In [0]:
model_normal.add(tf.keras.layers.Dense(units=128, activation = 'relu', input_shape=(784,)))

In [0]:
model_normal.add(tf.keras.layers.Dropout(rate=0.2))

In [0]:
#output layer. must have same number of units as classes in dataset
model_normal.add(tf.keras.layers.Dense(units = 10, activation = 'softmax'))

### Compile the model

In [12]:
model_normal.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['sparse_categorical_accuracy'])
model_normal.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 128)               100480    
_________________________________________________________________
dropout (Dropout)            (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 10)                1290      
Total params: 101,770
Trainable params: 101,770
Non-trainable params: 0
_________________________________________________________________


### Defining the Distributed Strategy

In [0]:
distribute = tf.distribute.MirroredStrategy()

### Defining a Distributed Model

In [0]:
with distribute.scope():
  model_distributed = tf.keras.models.Sequential()
  model_distributed.add(tf.keras.layers.Dense(units=128, activation='relu', input_shape=(784,)))
  model_distributed.add(tf.keras.layers.Dropout(rate = 0.2))
  model_distributed.add(tf.keras.layers.Dense(units=10, activation = 'softmax'))
  model_distributed.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['sparse_categorical_accuracy'])

##Speed comparison for proof of concept. 

In [15]:
start_time = time.time()
model_distributed.fit(X_train,y_train, epochs = 10, batch_size=25)
print(f"Distributed training took: {time.time()-start_time}")

Train on 2400 steps
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Distributed training took: 110.29052472114563


In [16]:
start_time = time.time()
model_normal.fit(X_train,y_train, epochs = 10, batch_size=25)
print(f"Nomal training took: {time.time()-start_time}")

W0813 17:28:11.423160 140708692191104 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Train on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Nomal training took: 123.63989973068237
