In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.python.ops import variable_scope as vs
from threading import Thread
from multiprocessing import Queue
from time import time

In [None]:
def basic_graph():
    batch = tf.placeholder(tf.float32, [32, 32], name='batch_data_matrix')
    with vs.variable_scope('full_con') as scope:
        activation = batch
        for i in range(1):
            with vs.variable_scope(scope, reuse=True if i > 0 else None):
                W_a = tf.get_variable('W_a', initializer=tf.random_normal([32, 32], stddev=0.1))
                b_a = tf.get_variable('b_a', initializer=tf.random_normal([32], stddev=0.1))
                activation = tf.sigmoid(tf.matmul(activation, W_a) + b_a)
    return batch, activation

In [None]:
def twin_compute(data):
    graph = tf.Graph()
    with graph.as_default():
        with tf.device('/cpu:0'), vs.variable_scope('graph_1') as gscope_1:
            batch_1, activation_1 = basic_graph()
        with tf.device('/gpu:0'), vs.variable_scope('graph_2') as gscope_2:
            batch_2, activation_2 = basic_graph()
    with tf.Session(graph=graph) as sess:
        sess.run(tf.initialize_all_variables())
        out_1, out_2 = sess.run([activation_1, activation_2],
                                {batch_1: data, batch_2: data})
    return {'g1_res': out_1, 'g2_res': out_2}

def sequential_compute(data):
    graph_1 = tf.Graph()
    with graph_1.as_default(), tf.device('/cpu:0'):
        batch_1, activation_1 = basic_graph()
    graph_2 = tf.Graph()
    with graph_2.as_default(), tf.device('/gpu:0'):
        batch_2, activation_2 = basic_graph()
    with tf.Session(graph=graph_1) as sess:
        sess.run(tf.initialize_all_variables())
        out_1 = sess.run(activation_1, {batch_1: data})
    with tf.Session(graph=graph_2) as sess:
        sess.run(tf.initialize_all_variables())
        out_2 = sess.run(activation_2, {batch_2: data})
    return {'g1_res': out_1, 'g2_res': out_2}

def single_compute(data):
    graph = tf.Graph()
    with graph.as_default(), tf.device('/gpu:0'):
        batch, activation = basic_graph()
    with tf.Session(graph=graph) as sess:
        sess.run(tf.initialize_all_variables())
        return sess.run(activation, {batch: data})
    
def thread_compute(data):
    graph = tf.Graph()
    out_q = Queue()
    
    with tf.device('/cpu:0'), vs.variable_scope('graph_1') as gscope_1:
        batch_1, activation_1 = basic_graph()
    with tf.device('/gpu:0'), vs.variable_scope('graph_2') as gscope_2:
        batch_2, activation_2 = basic_graph()
    
    def worker(sess, data_op, res_op, data_in, out_q, name):
        result = sess.run(res_op, {data_op: data_in})
        out_q.put({name: result})
        
    with tf.Session(graph=graph) as sess:
        sess.run(tf.initialize_all_variables())
        jobs = []
        jobs.append(Thread(target=worker, args=(sess, batch_1, activation_1, data, out_q, 'job_1')))
        jobs.append(Thread(target=worker, args=(sess, batch_2, activation_2, data, out_q, 'job_2')))
        
        for j in jobs:
            j.start()
        
        for j in jobs:
            j.join()
        
    res = {}
    res.update(out_q.get())
    res.update(out_q.get())
    
    return res

In [None]:
data = np.random.randn(32, 32).astype(np.float32)

In [None]:
t = time()
res = twin_compute(data)
print 'Twin computation took %.6fs' %(time() - t)

In [None]:
t = time()
res = sequential_compute(data)
print 'Sequential computation took %.6fs' %(time() - t)

In [None]:
t = time()
res = single_compute(data)
print 'Single computation took %.6fs' %(time() - t)

In [None]:
t = time()
res = thread_compute(data)
print 'Threaded computation took %.6fs' %(time() - t)