### Multi Task Learning 

The aim of multi task learning is to leverage two (or more) related tasks in the learning process with the hope that leanring one task aids perfromance in learning the other task(s) and thus improves predicitve power for at least one (ideally all) of the tasks. 

There are two distinct flavours of MTL: Hard parameter sharing and soft parameter sharing. In this project, the former is investigated.
Hard paramter sharing invloves two tasks sharing a common network which splits into task specific paths (e.g. a series of convolutional layers with two paths of dense layers for two seperate tasks). 

This project uses the FASHION MNIST dataset to compare MTL against individual task networks.

The two tasks to be performed:

- Task 1 - Clothing item 10 class classification (e.g. shoes, t-shirts etc) across 10 goups - $ y \in \mathbb{R^{10}} $
- Task 2 - Clothing group three class classification - predicitng whether a viewed clothing image belongs to one of three groups - $ y \in \mathbb{R^{3}} $
    - These groups are shoes (Sandal, Sneaker and Ankle Boot),  Gendered (Dress, Shirt and Bag) and Uni-Sex (T-shirt, Trouser, Pullover and Coat). 



In [12]:
import numpy as np
import keras.datasets.fashion_mnist as fashion_mnist
from keras.utils import to_categorical
import tensorflow.compat.v1 as tf
tf.disable_eager_execution()
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import os

In [4]:
# Load MNIST Fashion dataset

def load_data(): 
    # train_X: (60000, 28, 28)
    # train_y: (60000,)
    # test_X: (10000, 28, 28)
    # test_y: (10000,)
    (train_X, train_y_1), (test_X, test_y_1) = fashion_mnist.load_data()
    n_class_1 = 10
    # map to new label
    train_y_2 = list(0 if y in [5, 7, 9] else 1 if y in [3, 6, 8] else 2 for y in train_y_1)  
    test_y_2 = list(0 if y in [5, 7, 9] else 1 if y in [3, 6, 8] else 2 for y in test_y_1)
    n_class_2 = 3
    # train_X: (60000, 28, 28, 1)
    # test_X: (10000, 28, 28, 1)
    # train_y: (60000, n_class = 10)
    # test_y: (10000, n_class = 3)
    train_X = np.expand_dims(train_X, axis=3)
    test_X = np.expand_dims(test_X, axis=3)
    train_y_1 = to_categorical(train_y_1, n_class_1)
    test_y_1 = to_categorical(test_y_1, n_class_1)
    train_y_2 = to_categorical(train_y_2, n_class_2)
    test_y_2 = to_categorical(test_y_2, n_class_2)
    return train_X, train_y_1, train_y_2, test_X, test_y_1, test_y_2


x_train, y_train_1, y_train_2, x_test, y_test_1, y_test_2 = load_data()

#### Task Specific Networks 

Below are two seperate network classes of identical structure (except the logits and pred layers), one for each of the two tasks.

For the sake of convieience, both networks will use CNN filters - $[32, 64, 128]$. The kernel size will be 3 $\times$ 3 and a stride of 1 for all convolutional layers. Maxpooling layers are used after the first and second convolutional layers. The maxpooling layers have a kernel size of two and a stride of 2. 

After the final convolution, the outputs are passed to dense layers $[3136, 1024, 100 , N]$ where $N$ is the number of outputs required (10 or 3). 

In [5]:
class Task_1_NN():
    def __init__(self, x_train, y_train_1,  output_dir, lr=0.001, nb_epochs=10, batch_size=50):
        self.nb_epochs = nb_epochs
        self.lr = lr
        self.batch_size = batch_size
        self.nb_epochs = nb_epochs
        self.nb_images, self.edge, _, _ = x_train.shape
        self.nb_iterations = self.nb_images // batch_size
        self.output_dir = output_dir
        self.x_train = x_train
        self.y_train_1 = y_train_1
        self.m = x_train.shape[0]
        self.n_output_1 = y_train_1.shape[1]
        
        self.X = tf.placeholder(tf.float32, (None, 28, 28, 1), "X")
        self.y_1 = tf.placeholder(tf.float32, (None, self.n_output_1), "y_1")
    
    def create_model(self):            
        with tf.variable_scope("Task_1", reuse=tf.AUTO_REUSE):
            
            # Construct the network for task 1
            self.hlayer1 = tf.layers.conv2d(self.X, filters=32, kernel_size=3, strides=(1, 1), activation=tf.nn.relu)
            self.hlayer1 = tf.layers.max_pooling2d(self.hlayer1, pool_size=2, strides=2)
            self.hlayer2 = tf.layers.conv2d(self.hlayer1, filters=64, kernel_size=3, strides=(1, 1), activation=tf.nn.relu)
            self.hlayer2 = tf.layers.max_pooling2d(self.hlayer2, pool_size=2, strides=2)
            self.hlayer3 = tf.layers.conv2d(self.hlayer2, filters=128, kernel_size=3, strides=(1, 1), activation=tf.nn.relu)
            self.flatlayer = tf.layers.flatten(self.hlayer3)
            self.hlayer4 = tf.layers.dense(self.flatlayer, 1024, tf.nn.relu)
            self.hlayer5 = tf.layers.dense(self.hlayer4, 100, tf.nn.relu)
            self.logits = tf.layers.dense(self.hlayer5, 10)
            self.preds = tf.nn.softmax(self.logits)
                
    def compute_loss(self):
        with tf.variable_scope('loss'):
            # Calculate the loss 
            self.loss_task_1 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=self.y_1, logits=self.logits))
            self.loss_summ = tf.summary.scalar("softmax_loss", self.loss_task_1) 
                
                
    def optimizer(self):
        with tf.variable_scope('optimizer', reuse=tf.AUTO_REUSE):
            optimizer = tf.train.AdamOptimizer(learning_rate=self.lr, beta1=0.5)
            self.model_vars = tf.trainable_variables()
            self.trainer = optimizer.minimize(self.loss_task_1, var_list=self.model_vars)
            


In [6]:
class Task_2_NN():
    def __init__(self, x_train, y_train_2,  output_dir, lr=0.001, nb_epochs=10, batch_size=50):
        self.nb_epochs = nb_epochs
        self.lr = lr
        self.batch_size = batch_size
        self.nb_epochs = nb_epochs
        self.nb_images, self.edge, _, _ = x_train.shape
        self.nb_iterations = self.nb_images // batch_size
        self.output_dir = output_dir
        self.x_train = x_train
        self.y_train_2 = y_train_2
        self.m = x_train.shape[0]
        self.n_output_2 = y_train_2.shape[1]
        
        self.X = tf.placeholder(tf.float32, (None, 28, 28, 1), "X")
        self.y_2 = tf.placeholder(tf.float32, (None, self.n_output_2), "y_2")
    
    def create_model(self):            
        with tf.variable_scope("Task_2", reuse=tf.AUTO_REUSE):
            
            # Construct network for task 2
            self.hlayer1 = tf.layers.conv2d(self.X, filters=32, kernel_size=3, strides=(1, 1), activation=tf.nn.relu)
            self.hlayer1 = tf.layers.max_pooling2d(self.hlayer1, pool_size=2, strides=2)
            self.hlayer2 = tf.layers.conv2d(self.hlayer1, filters=64, kernel_size=3, strides=(1, 1), activation=tf.nn.relu)
            self.hlayer2 = tf.layers.max_pooling2d(self.hlayer2, pool_size=2, strides=2)
            self.hlayer3 = tf.layers.conv2d(self.hlayer2, filters=128, kernel_size=3, strides=(1, 1), activation=tf.nn.relu)
            self.flatlayer = tf.layers.flatten(self.hlayer3)
            self.hlayer4 = tf.layers.dense(self.flatlayer, 1024, tf.nn.relu)
            self.hlayer5 = tf.layers.dense(self.hlayer4, 100, tf.nn.relu)
            self.logits = tf.layers.dense(self.hlayer5, 3)
            self.preds = tf.nn.softmax(self.logits)
                
    def compute_loss(self):
        with tf.variable_scope('loss'):
            # Calculate the loss 
            self.loss_task_2 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=self.y_2, logits=self.logits))
            self.loss_summ = tf.summary.scalar("softmax_loss", self.loss_task_2) 
                
                
    def optimizer(self):
        with tf.variable_scope('optimizer', reuse=tf.AUTO_REUSE):
            optimizer = tf.train.AdamOptimizer(learning_rate=self.lr, beta1=0.5)
            self.model_vars = tf.trainable_variables()
            self.trainer = optimizer.minimize(self.loss_task_2, var_list=self.model_vars)

#### Train and evaluate the model for task 1

In [7]:
#Lists to hold loss and training accuracy
train_acc_task_1 = []
train_acc_task_2 = []

test_acc_task_1 = []
test_acc_task_2 = []

loss_task_1 = []
loss_task_2 = []

In [None]:
%%time
# Initialise model for task 1
model_1 = Task_1_NN(x_train, y_train_1, './Task1_logdir/', 0.001, 10, 200)            
model_1.create_model()     

model_1.compute_loss()
model_1.optimizer()   

model_1.optimizer()
init = (tf.global_variables_initializer(),
        tf.local_variables_initializer())

saver = tf.train.Saver()
summary =tf.Summary()


sess = tf.InteractiveSession()
sess.run(init)
writer = tf.summary.FileWriter(model_1.output_dir)
writer.add_graph(sess.graph)
if not os.path.exists(model_1.output_dir):
    os.makedirs(model_1.output_dir)  

# Train the model for 10 epochs

for epoch in range(model_1.nb_epochs):
    # Shuffle training examplars
    randomize = np.arange(x_train.shape[0])
    np.random.shuffle(randomize)
    x_in = model_1.x_train[randomize,:]
    y_in_1 = model_1.y_train_1[randomize,:]
    
    for i in range(model_1.nb_iterations):
        
        input_x_train = x_in[i*model_1.batch_size: (i+1)*model_1.batch_size]
        input_y_train_1 = y_in_1[i*model_1.batch_size: (i+1)*model_1.batch_size]
        _ , preds_1, loss_1, loss_summ = sess.run([model_1.trainer, model_1.preds,  model_1.loss_task_1, model_1.loss_summ], 
                                 feed_dict={model_1.X: input_x_train, 
                                            model_1.y_1: input_y_train_1})

        y_preds_1 = np.argmax(preds_1, axis=1)
        y_real_1 = np.argmax(input_y_train_1, axis=1)
        acc_train_1 = np.mean((y_preds_1==y_real_1)*1)
        # Record loss and train accuracy
        train_acc_task_1.append(acc_train_1)
        loss_task_1.append(loss_1)
        #print('Epoch %d, Iteration %d, loss_1 %.3f,  batch accuracy_1 %.3f' %(epoch, i, loss_1,acc_train_1))
        writer.add_summary(loss_summ, epoch * model_1.nb_iterations + i)
    saver.save(sess, model_1.output_dir, global_step=epoch)
    
    #Testing accuracy for entire test set each epoch
    preds = sess.run(model_1.preds, feed_dict={model_1.X: x_test})
    y_preds = np.argmax(preds, axis=1)
    y_real = np.argmax(y_test_1, axis=1)
    acc_test = np.mean((y_preds==y_real)*1)
    test_acc_task_1.append(acc_test)
    

In [None]:
# Compute the final test accuracy of the task 1 model

batch_size_test = 20
nb_test_points = x_test.shape[0] 
nb_iterations = nb_test_points//batch_size_test
preds_1 = []
for i in range(nb_iterations):
    input_x_test = x_test[i*batch_size_test: (i+1)*batch_size_test]
    preds_test_1 = sess.run(model_1.preds, 
                             feed_dict={model_1.X: input_x_test})
    preds_1.append(np.argmax(preds_test_1, axis=1))
    if np.mod(nb_test_points, batch_size_test) !=0:
        input_x_test = x_test[i*batch_size_test: -1]
        preds_test_1= sess.run(model_1.preds, 
                             feed_dict={model_1.X: input_x_test})
        preds_1.append(np.argmax(preds_test_1, axis=1))
all_preds_1 = np.concatenate(preds_1, axis =0)
y_real_1 = np.argmax(y_test_1, axis=1)
print(all_preds_1)
print(y_real_1)
acc_test_1 = np.mean((all_preds_1==y_real_1)*1)
print('Test accuracy - task 1 achieved: %.3f' %acc_test_1)

In [None]:
sess.close()

#### Train and evaluate the model for task 2

In [None]:
%%time
# Initialise model for task 2
model_2 = Task_2_NN(x_train, y_train_2, './Task2_logdir/', 0.001, 10, 200)            
model_2.create_model()     

model_2.compute_loss()
model_2.optimizer()   

model_2.optimizer()
init = (tf.global_variables_initializer(),
        tf.local_variables_initializer())

saver = tf.train.Saver()
summary =tf.Summary()


sess = tf.InteractiveSession()
sess.run(init)
writer = tf.summary.FileWriter(model_2.output_dir)
writer.add_graph(sess.graph)
if not os.path.exists(model_2.output_dir):
    os.makedirs(model_2.output_dir)
    
# Train the model for 10 epochs

for epoch in range(model_2.nb_epochs):
    
    # Shuffle training examplars
    randomize = np.arange(x_train.shape[0])
    np.random.shuffle(randomize)
    x_in = model_2.x_train[randomize,:]
    y_in_2 = model_2.y_train_2[randomize,:]
    
    for i in range(model_2.nb_iterations):
        input_x_train = x_in[i*model_2.batch_size: (i+1)*model_2.batch_size]
        input_y_train_2 = y_in_2[i*model_2.batch_size: (i+1)*model_2.batch_size]
        _ , preds_2, loss_2, loss_sum = sess.run([model_2.trainer, model_2.preds,  model_2.loss_task_2, model_2.loss_summ], 
                                 feed_dict={model_2.X: input_x_train, 
                                            model_2.y_2: input_y_train_2})
        y_preds_2 = np.argmax(preds_2, axis=1)
        y_real_2 = np.argmax(input_y_train_2, axis=1)
        acc_train_2 = np.mean((y_preds_2==y_real_2)*1)
        
        #Record loss and train accuracy
        train_acc_task_2.append(acc_train_2)
        loss_task_2.append(loss_2)
        #print('Epoch %d, Iteration %d, loss_2 %.3f, batch accuracy_2 %.3f' %(epoch, i, loss_2,acc_train_2))
        writer.add_summary(loss_sum, epoch * model_2.nb_iterations + i)
    saver.save(sess, model_2.output_dir, global_step=epoch)
    
    #Testing accuracy for entire test set each epoch
    
    preds = sess.run(model_2.preds, feed_dict={model_2.X: x_test})
    y_preds = np.argmax(preds, axis=1)
    y_real = np.argmax(y_test_2, axis=1)
    acc_test = np.mean((y_preds==y_real)*1)
    test_acc_task_2.append(acc_test)

In [None]:
# Compute the final test accuracy of the task 2 model

batch_size_test = 20
nb_test_points = x_test.shape[0]
nb_iterations = nb_test_points//batch_size_test
preds_2 = []
for i in range(nb_iterations):
    input_x_test = x_test[i*batch_size_test: (i+1)*batch_size_test]
    preds_test_2 = sess.run(model_2.preds, 
                             feed_dict={model_2.X: input_x_test})
    preds_2.append(np.argmax(preds_test_2, axis=1))
    if np.mod(nb_test_points, batch_size_test) !=0:
        input_x_test = x_test[i*batch_size_test: -1]
        preds_test_2= sess.run([model_2.preds], 
                             feed_dict={model_2.X: input_x_test})
        preds_2.append(np.argmax(preds_test_2, axis=1))
all_preds_2 = np.concatenate(preds_2, axis =0)
y_real_2 = np.argmax(y_test_2, axis=1)
acc_test_2 = np.mean((all_preds_2==y_real_2)*1)

print('Test accuracy - task 2 achieved: %.3f' %acc_test_2) 


In [None]:
sess.close()

#### Multi-Task Learning 

Here we build the multi-task learning network.

Model inputs:
 - x_train, the training matrix
 - y_train_1, the fashion labels for task 1 (Fashion Item  classification)
 - y_train_2, the labels for task 2 (Fashion Group classification)
 - $\lambda \in [0,1]$ , lambda_, the loss weight for task 1 (1- $\lambda$) is the loss weight for task 2
 - output_dir, the directory where model parameters and tensorbaord event files will be stored. 
 - lr, the learning rate for ADAM
 - nb_epochs, the number of epochs to use
 - batch_size, the number of data points in each mini-batch

The Multi Task Learning architecture is comprised of a shared convolutional backbone of three layers and a single shared fully connected layer with pooling between the first two pairs of convolutions.  The output of the shared fully connected layer is passed to two series of task specific dense layers, one for each of the two tasks. 

The architecture is as follows:
 - Shared Convolutional layers $[32, 64, 128]$ with max pooling after the first and second conv layers
     - kernel size ($ 3 \times 3$) for conv and ($2 \times 2$) for max pool
     - stride 1 for conv and 2 for max pooling
 - Flatten 
 - Shared Dense Layer $[3136]$ - the outputs of which are passed to the two task dense layers
 - Task 1 Dense Layers $[1024, 100, 10]$ - 10 is the dimenson of the logits/preds
 - Task 2 Dense Layers $[1024, 100, 3]$ - 3 is the dimenson of the logits/preds 
 - Task 1 Activation Layer - as earlier we use softmax
 - Task 2 Activation Layer  - as earlier we use softmax
 
The total loss, which is a sum of the weighted losses for tasks 1 and 2 ($\lambda * L_1 + (1-\lambda) * L_2$) is passed to the optimiser. In the following experiments $\lambda$ is set to 0.5 i.e. each loss contributes equally to the total loss.

In [14]:
class MTL:
    def __init__(self, x_train, y_train_1, y_train_2, lambda_, output_dir, lr=0.001, nb_epochs=10, batch_size=50):
        self.nb_epochs = nb_epochs
        self.lr = lr
        self.batch_size = batch_size
        self.nb_epochs = nb_epochs
        self.nb_images, self.edge, _, _ = x_train.shape
        self.nb_iterations = self.nb_images // batch_size
        self.output_dir = output_dir
        self.x_train = x_train
        self.y_train_1 = y_train_1
        self.y_train_2 = y_train_2
        self.lambda_ = lambda_
        
        self.m = x_train.shape[0]
        self.n_output_1 = y_train_1.shape[1]
        self.n_output_2 = y_train_2.shape[1]
        
        self.X = tf.placeholder(tf.float32, (None, 28, 28, 1), "X")
        self.y_1 = tf.placeholder(tf.float32, (None, self.n_output_1), "y_1")
        self.y_2 = tf.placeholder(tf.float32, (None, self.n_output_2), "y_2")

    
    def create_model(self):            
        with tf.variable_scope("MTL", reuse=tf.AUTO_REUSE):
            
            # Constructed Multi Task Learning Network
            
            #Shared part network
            self.hlayer1 = tf.layers.conv2d(self.X, filters=32, kernel_size=3, strides=(1, 1), activation=tf.nn.relu)
            self.hlayer1 = tf.layers.max_pooling2d(self.hlayer1, pool_size=2, strides=2)
            self.hlayer2 = tf.layers.conv2d(self.hlayer1, filters=64, kernel_size=3, strides=(1, 1), activation=tf.nn.relu)
            self.hlayer2 = tf.layers.max_pooling2d(self.hlayer2, pool_size=2, strides=2)
            self.hlayer3 = tf.layers.conv2d(self.hlayer2, filters=128, kernel_size=3, strides=(1, 1), activation=tf.nn.relu)
            self.flatlayer = tf.layers.flatten(self.hlayer3)
            self.hlayer4 = tf.layers.dense(self.flatlayer, 1024, tf.nn.relu)
            
            #Task 1 specific split
            self.hlayer_task_1 = tf.layers.dense(self.hlayer4, 100, tf.nn.relu)
            self.logits_1 = tf.layers.dense(self.hlayer_task_1, 10)
            self.pred_1 = tf.nn.softmax(self.logits_1)
            
            #Task 2 specific split
            self.hlayer_task_2 = tf.layers.dense(self.hlayer4, 100, tf.nn.relu)
            self.logits_2 = tf.layers.dense(self.hlayer_task_2, 3)
            self.pred_2 = tf.nn.softmax(self.logits_2)
        
    def compute_loss(self):
        with tf.variable_scope('loss'):
            # Compute the loss for each task and calculate weighted sum to give total loss
            self.loss_task_1 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=self.y_1, logits=self.logits_1))

            self.loss_task_2 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=self.y_2, logits=self.logits_2))

            self.loss_total = self.lambda_*self.loss_task_1 + (1-self.lambda_)*self.loss_task_2 
            
            self.loss_task_1_graph = tf.summary.scalar("softmax_loss_task_1", self.loss_task_1) 
            self.loss_task_2_graph = tf.summary.scalar("softmax_loss_task_2", self.loss_task_2)             
            self.loss_sum = tf.summary.scalar("softmax_loss", self.loss_total) 
            
                
                
    def optimizer(self):
        with tf.variable_scope('optimizer', reuse=tf.AUTO_REUSE):
            optimizer = tf.train.AdamOptimizer(learning_rate=self.lr, beta1=0.5)
            self.model_vars = tf.trainable_variables()
            self.trainer = optimizer.minimize(self.loss_total, var_list=self.model_vars)

In [None]:
# Initialise Multi Task Learning Model

tf.reset_default_graph()
model = MTL(x_train, y_train_1, y_train_2, 0.5, './MTL_logdir/', 0.001, 10, 200)             #       
model.create_model()     

model.compute_loss()
model.optimizer()   

model.optimizer()
init = (tf.global_variables_initializer(),
        tf.local_variables_initializer())

saver = tf.train.Saver()
summary =tf.Summary()


sess = tf.InteractiveSession()
sess.run(init)
writer = tf.summary.FileWriter(model.output_dir)
writer.add_graph(sess.graph)
if not os.path.exists(model.output_dir):
    os.makedirs(model.output_dir) 

#### Train Multi Task Learning model and evaluate on test set

In [None]:
# Lists to hold training and test accuracy and losses for both task 1 and 2
train_acc_MTL_task_1 = []
train_acc_MTL_task_2 = []

test_acc_MTL_task_1 = []
test_acc_MTL_task_2 = []

loss_MTL_task_1 = []
loss_MTL_task_2 = []

In [None]:
%%time
# Train Multi Task Learning model for 10 epochs 

for epoch in range(model.nb_epochs):
    randomize = np.arange(x_train.shape[0])
    np.random.shuffle(randomize)
    x_in = model.x_train[randomize,:]
    y_in_1 = model.y_train_1[randomize,:]
    y_in_2 = model.y_train_2[randomize,:]
    for i in range(model.nb_iterations):
        input_x_train = x_in[i*model.batch_size: (i+1)*model.batch_size]
        input_y_train_1 = y_in_1[i*model.batch_size: (i+1)*model.batch_size]
        input_y_train_2 = y_in_2[i*model.batch_size: (i+1)*model.batch_size]
        _ , preds_1, preds_2, loss_1, loss_2, loss_summ= sess.run([model.trainer, model.pred_1, model.pred_2, model.loss_task_1, model.loss_task_2, model.loss_sum], 
                                 feed_dict={model.X: input_x_train, 
                                            model.y_1: input_y_train_1,
                                            model.y_2: input_y_train_2})

        y_preds_1 = np.argmax(preds_1, axis=1)
        y_preds_2 = np.argmax(preds_2, axis=1)
        y_real_1 = np.argmax(input_y_train_1, axis=1)
        y_real_2 = np.argmax(input_y_train_2, axis=1)
        acc_train_1 = np.mean((y_preds_1==y_real_1)*1)
        acc_train_2 = np.mean((y_preds_2==y_real_2)*1)
        
        #Record loss and train accuracy
        train_acc_MTL_task_1.append(acc_train_1)
        train_acc_MTL_task_2.append(acc_train_2)

        loss_MTL_task_1.append(loss_1)
        loss_MTL_task_2.append(loss_2)
        #print('Epoch %d, Iteration %d, loss_1 %.3f, loss_2 %.3f, batch accuracy_1 %.3f, batch accuracy_2 %.3f' %(epoch, i, loss_1, loss_2, acc_train_1, acc_train_2))
        writer.add_summary(loss_summ, epoch * model.nb_iterations + i)
    saver.save(sess, model.output_dir, global_step=epoch)
    
    #Testing accuracy for entire test set each epoch
    pred_1, pred_2 = sess.run([model.pred_1, model.pred_2], feed_dict={model.X: x_test})
    
    y_pred_1 = np.argmax(pred_1, axis=1)
    y_real_1 = np.argmax(y_test_1, axis=1)
    acc_test_1 = np.mean((y_pred_1==y_real_1)*1)
    test_acc_MTL_task_1.append(acc_test_1)
    
    y_pred_2 = np.argmax(pred_2, axis=1)
    y_real_2 = np.argmax(y_test_2, axis=1)
    acc_test_2 = np.mean((y_pred_2==y_real_2)*1)
    test_acc_MTL_task_2.append(acc_test_2)



In [None]:
# Compute test accuracy for final model

batch_size_test = 20
nb_test_points = x_test.shape[0] 
nb_iterations = nb_test_points//batch_size_test
preds_1 = []
preds_2 = []
for i in range(nb_iterations):
    input_x_test = x_test[i*batch_size_test: (i+1)*batch_size_test]
    preds_test_1, preds_test_2 = sess.run([model.pred_1, model.pred_2], 
                             feed_dict={model.X: input_x_test})
    preds_1.append(np.argmax(preds_test_1, axis=1))
    preds_2.append(np.argmax(preds_test_2, axis=1))
    if np.mod(nb_test_points, batch_size_test) !=0:
        input_x_test = x_test[i*batch_size_test: -1]
        preds_test_1, preds_test_2 = sess.run([model.pred_1, model.pred_2], 
                             feed_dict={model.X: input_x_test})
        preds_1.append(np.argmax(preds_test_1, axis=1))
        preds_2.append(np.argmax(preds_test_2, axis=1))
all_preds_1 = np.concatenate(preds_1, axis =0)
all_preds_2 = np.concatenate(preds_2, axis =0)
y_real_1 = np.argmax(y_test_1, axis=1)
y_real_2 = np.argmax(y_test_2, axis=1)
acc_test_1 = np.mean((all_preds_1==y_real_1)*1)
acc_test_2 = np.mean((all_preds_2==y_real_2)*1)
print('Test accuracy - task 1 achieved: %.3f' %acc_test_1)
print('Test accuracy - task 2 achieved: %.3f' %acc_test_2)

#### Ploting the performance of Multi Task Learning vs Task Specific Networks

In [15]:
#Function for smoothing data
def smooth(array, mov_avg_size, step_size):
    
    N = array.shape[0]
    out = []
    
    cursor = 0
    
    while (cursor + mov_avg_size) <= N:
        
        out.append(sum(array[cursor:cursor+mov_avg_size])/mov_avg_size)
        cursor += step_size
        
    return out

In [None]:
#Add plot of network performance 
#Iteration vs. Training Accuracy for Individual Tasks and MTL
fig, ax = plt.subplots(figsize=(15,8))

train_acc_task_1_smooth = smooth(np.array(train_acc_task_1), 200, 1)
train_acc_task_2_smooth = smooth(np.array(train_acc_task_2), 200, 1)
train_acc_MTL_task_1_smooth = smooth(np.array(train_acc_MTL_task_1), 200, 1)
train_acc_MTL_task_2_smooth = smooth(np.array(train_acc_MTL_task_2), 200, 1)

ax.plot(train_acc_task_1_smooth,label='Task 1') 
ax.plot(train_acc_task_2_smooth,label='Task 2') 
ax.plot(train_acc_MTL_task_1_smooth,label='MTL Task 1') 
ax.plot(train_acc_MTL_task_2_smooth,label='MTL Task 2') 
ax.set_xlabel('Iterations')
ax.set_ylabel('Training Accuracy')
ax.set_title('Iteration vs. Training Accuracy for Individual Tasks and Multi-Task Learning')
ax.legend()
#plt.ylim(0.9,1.0)
#plt.xlim(0,2800)
plt.show()

In [None]:
#Add plot of network performance 
#Iteration vs. Training Accuracy for Individual Tasks and MTL
fig, ax = plt.subplots(figsize=(15,8))

train_acc_task_1_smooth = smooth(np.array(train_acc_task_1), 200, 1)
train_acc_task_2_smooth = smooth(np.array(train_acc_task_2), 200, 1)
train_acc_MTL_task_1_smooth = smooth(np.array(train_acc_MTL_task_1), 200, 1)
train_acc_MTL_task_2_smooth = smooth(np.array(train_acc_MTL_task_2), 200, 1)

ax.plot(train_acc_task_1_smooth,label='Task 1') 
ax.plot(train_acc_task_2_smooth,label='Task 2') 
ax.plot(train_acc_MTL_task_1_smooth,label='MTL Task 1') 
ax.plot(train_acc_MTL_task_2_smooth,label='MTL Task 2') 
ax.set_xlabel('Iterations')
ax.set_ylabel('Training Accuracy')
ax.set_title('Iteration vs. Training Accuracy for Individual Tasks and Multi-Task Learning')
ax.legend()
#plt.ylim(0.9,1.0)
#plt.xlim(0,2800)
plt.show()

In [None]:
#Add plot of network performance 
#Iteration vs. Network loss for Individual Tasks and MTL
fig, ax = plt.subplots(figsize=(15,8))

loss_task_1_smooth = smooth(np.array(loss_task_1), 200, 1)
loss_task_2_smooth = smooth(np.array(loss_task_2), 200, 1)
loss_MTL_task_1_smooth = smooth(np.array(loss_MTL_task_1), 200, 1)
loss_MTL_task_2_smooth = smooth(np.array(loss_MTL_task_2), 200, 1)

ax.plot(loss_task_1_smooth,label='Task 1') 
ax.plot(loss_task_2_smooth,label='Task 2') 
ax.plot(loss_MTL_task_1_smooth,label='MTL Task 1') 
ax.plot(loss_MTL_task_2_smooth,label='MTL Task 2') 
ax.set_xlabel('Iterations')
ax.set_ylabel('Network Loss')
ax.set_title('Iteration vs. Network Loss for Individual Tasks and Multi-Task Learning')
ax.legend()
#plt.ylim(0.9,1.0)
plt.xlim(1)
plt.show()