# Train Model on Distributed Cluster

## Define Cluster Spec

In [None]:
import tensorflow as tf

cluster = tf.train.ClusterSpec({"worker": ["localhost:2222","localhost:2223"], 
                                "ps": ["localhost:2224"]})

## Start Server "Worker Task 0" (localhost:2222)

In [None]:
worker0 = tf.train.Server(cluster, job_name="worker", task_index=0)

print(worker0)

## Start Server "Worker Task 1" (localhost:2223)

In [None]:
worker1 = tf.train.Server(cluster, job_name="worker", task_index=1)

print(worker1)

## Start Server "Parameter Server Task 0" (localhost:2224)

In [None]:
ps0 = tf.train.Server(cluster, job_name="ps", task_index=0)
#ps0.join()
print(ps0)

## Define a Computationally-intensive TensorFlow Graph

In [None]:
import tensorflow as tf

n = 2
c1 = tf.Variable([])
c2 = tf.Variable([])

def matpow(M, n):
    if n < 1: 
        return M
    else:
        return tf.matmul(M, matpow(M, n-1))

## Execute Graph on Manually-Assigned Devices 

### All CPU Devices
Note the execution time.

In [None]:
import datetime

with tf.device("/job:worker/task:0/cpu:0"):
    A = tf.random_normal(shape=[10000, 10000])
    print(A)
    c1 = matpow(A,n)
    print(c1)

with tf.device("/job:worker/task:1/cpu:0"):
    B = tf.random_normal(shape=[10000, 10000])
    print(B)
    c2 = matpow(B,n)
    print(c2)

with tf.Session("grpc://127.0.0.1:2222") as sess:
    sum = c1 + c2
    start_time = datetime.datetime.now()
    print(sess.run(sum))
    print("Execution time: " 
          + str(datetime.datetime.now() - start_time))
          

### CPU and GPU
Note the execution time.

In [None]:
with tf.device("/job:worker/task:0/gpu:0"):
    A = tf.random_normal(shape=[10000, 10000])
    print(A)
    c1 = matpow(A,n)
    print(c1)

with tf.device("/job:worker/task:1/cpu:0"):
    B = tf.random_normal(shape=[10000, 10000])
    print(B)
    c2 = matpow(B,n)
    print(c2)

with tf.Session("grpc://127.0.0.1:2222") as sess:
    sum = c1 + c2
    start_time = datetime.datetime.now()
    print(sess.run(sum))
    print("Execution time: " 
          + str(datetime.datetime.now() - start_time))

### All GPU Devices
Note the execution time.

In [None]:
with tf.device("/job:worker/task:0/gpu:0"):
    A = tf.random_normal(shape=[10000, 10000])
    print(A)
    c1 = matpow(A,n)
    print(c1)

with tf.device("/job:worker/task:1/gpu:0"):
    B = tf.random_normal(shape=[10000, 10000])
    print(B)
    c2 = matpow(B,n)
    print(c2)

with tf.Session("grpc://127.0.0.1:2222") as sess:
    sum = c1 + c2
    start_time = datetime.datetime.now()
    print(sess.run(sum))
    print("Execution time: " 
          + str(datetime.datetime.now() - start_time))

## Execute Graph with Auto-Assigned Devices 
`tf.train.replica_device_setter()` uses round-robin by default. Note the execution time.

In [None]:
with tf.device(tf.train.replica_device_setter(worker_device="/job:worker/task:0",
                                              cluster=cluster)):
    A = tf.random_normal(shape=[10000, 10000])
    print(A)
    c1 = matpow(A,n)
    print(c1)

with tf.device(tf.train.replica_device_setter(worker_device="/job:worker/task:1",
                                              cluster=cluster)):
    B = tf.random_normal(shape=[10000, 10000])
    print(B)
    c2 = matpow(B,n)
    print(c2)

with tf.Session("grpc://127.0.0.1:2222") as sess:
    sum = c1 + c2
    start_time = datetime.datetime.now()
    print(sess.run(sum))
    print("Multi node computation time: " 
          + str(datetime.datetime.now() - start_time))

## Clean Up

In [None]:
#TODO:
#worker0.stop()
#worker1.stop()
#ps0.stop()