## Paso 1: Importar las dependencias del proyecto

In [1]:
import time
import numpy as np
import tensorflow as tf

In [2]:
tf.__version__

'2.13.0'

## Paso 2: Pre procesado del dataset

### Cargar el dataset del MNIST

In [3]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

### Normalización de las imágenes

In [4]:
X_train = X_train / 255.
X_test = X_test / 255.

In [5]:
X_train.shape

(60000, 28, 28)

### Redimensionar el dataset

In [6]:
X_train = X_train.reshape(-1, 28*28)
X_test = X_test.reshape(-1, 28*28)

In [7]:
X_train.shape

(60000, 784)

## Paso 3: Entrenamiento distribuido

### Definir un modelo normal (no distribuido)

In [8]:
model_normal = tf.keras.models.Sequential()

In [9]:
model_normal.add(tf.keras.layers.Dense(units=128, activation='relu', input_shape=(784,)))

In [10]:
model_normal.add(tf.keras.layers.Dropout(rate=0.2))

In [11]:
model_normal.add(tf.keras.layers.Dense(units=10, activation='softmax'))

In [12]:
model_normal.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])

### Definir una estrategia distribuida

In [13]:
distribute = tf.distribute.MirroredStrategy()#Modelo a usar

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)


### Definir un modelo distribuido

In [14]:
with distribute.scope():#Definir modelo
  model_distributed = tf.keras.models.Sequential()
  model_distributed.add(tf.keras.layers.Dense(units=128, activation='relu', input_shape=(784,)))
  model_distributed.add(tf.keras.layers.Dropout(rate=0.2))
  model_distributed.add(tf.keras.layers.Dense(units=10, activation='softmax'))
  model_distributed.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])

### Comparar la velocidad de entrenamiento normal vs distribuida

In [15]:
start_time = time.time()
model_distributed.fit(X_train, y_train, epochs=20, batch_size=20)
print("El entrenamiento distribuido ha tardado: {}".format(time.time() - start_time))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
El entrenamiento distribuido ha tardado: 161.62584233283997


In [16]:
start_time = time.time()
model_normal.fit(X_train, y_train, epochs=20, batch_size=20)
print("El entrenamiento normal ha tardado: {}".format(time.time() - start_time))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
El entrenamiento normal ha tardado: 123.94374561309814


In [17]:
test_loss, test_accuracy = model_distributed.evaluate(X_test, y_test)
print("Precisión del conjunto de test Distribuido: {}".format(test_accuracy))
test_loss, test_accuracy = model_normal.evaluate(X_test, y_test)
print("Precisión del conjunto de test Normal: {}".format(test_accuracy))

Precisión del conjunto de test Distribuido: 0.9793999791145325
Precisión del conjunto de test Normal: 0.9797000288963318
