# Using the MANN Package to train a Computer Vision and Fully Connected Multitask Neural Network

In this notebook, the MANN package will be used to train a multitask network for three tasks, two of which will be computer vision tasks utilizing a convolutional architecture fed into a few fully-connected layers and one of which will be a tabular task using only a fully-connected architecture.  The convolutional tasks will be trained on both the MNIST Digit and MNIST Fashion datasets, and the third will be the Boston Housing Price dataset.

In [None]:
# Load the MinMaxScaler from Scikit Learn, TensorFlow, numpy, and MANN
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
import numpy as np
import mann

In [None]:
# Load both the MNIST tasks
(digit_x_train, digit_y_train), (digit_x_test, digit_y_test) = tf.keras.datasets.mnist.load_data()
(fashion_x_train, fashion_y_train), (fashion_x_test, fashion_y_test) = tf.keras.datasets.fashion_mnist.load_data()

# Reshape the images so they have channels and divide by 255 so all values are in [0, 1]
digit_x_train = digit_x_train.reshape(digit_x_train.shape + (1,))/255
digit_x_test = digit_x_test.reshape(digit_x_test.shape + (1,))/255
fashion_x_train = fashion_x_train.reshape(fashion_x_train.shape + (1,))/255
fashion_x_test = fashion_x_test.reshape(fashion_x_test.shape + (1,))/255

# Load the Boston housing data and reshape the targets so they have one column
(boston_x_train, boston_y_train), (boston_x_test, boston_y_test) = tf.keras.datasets.boston_housing.load_data()
boston_y_train = boston_y_train.reshape(-1, 1)
boston_y_test = boston_y_test.reshape(-1, 1)

# Scale the Boston housing data to values between 0 and 1, as measured by the training data
boston_x_scaler = MinMaxScaler()
boston_x_train = boston_x_scaler.fit_transform(boston_x_train)
boston_x_test = boston_x_scaler.transform(boston_x_test)

# Scale the target values within the Boston housing dataset as well
boston_y_scaler = MinMaxScaler()
boston_y_train = boston_y_scaler.fit_transform(boston_y_train)
boston_y_test = boston_y_scaler.transform(boston_y_test)

# Reshape the y data to have one column
digit_y_train = digit_y_train.reshape(-1, 1)
digit_y_test = digit_y_test.reshape(-1, 1)
fashion_y_train = fashion_y_train.reshape(-1, 1)
fashion_y_test = fashion_y_test.reshape(-1, 1)
boston_y_train = boston_y_train.reshape(-1, 1)
boston_y_test = boston_y_test.reshape(-1, 1)

# Create a callback to stop training early
callback = tf.keras.callbacks.EarlyStopping(min_delta = 0.01, patience = 3, restore_best_weights = True)

# Create the model

In [None]:
# Create the input layers
digit_input = tf.keras.layers.Input(digit_x_train.shape[1:])
fashion_input = tf.keras.layers.Input(fashion_x_train.shape[1:])
boston_input = tf.keras.layers.Input(boston_x_train.shape[1:])

# Create the convolutional blocks for the image data
image_x = mann.layers.MultiMaskedConv2D(
    filters = 32,
    kernel_size = 3,
    padding = 'same',
    strides = 1,
    activation = 'relu'
)([digit_input, fashion_input])
image_x = mann.layers.MultiMaskedConv2D(
    filters = 32,
    kernel_size = 3,
    padding = 'same',
    strides = 1,
    activation = 'relu'
)(image_x)
image_x = mann.layers.MultiMaxPool2D(
    pool_size = 2,
    strides = 1,
    padding = 'valid'
)(image_x)
image_x = mann.layers.MultiMaskedConv2D(
    filters = 64,
    kernel_size = 3,
    padding = 'same',
    strides = 1,
    activation = 'relu'
)([digit_input, fashion_input])
image_x = mann.layers.MultiMaskedConv2D(
    filters = 64,
    kernel_size = 3,
    padding = 'same',
    strides = 1,
    activation = 'relu'
)(image_x)
image_x = mann.layers.MultiMaxPool2D(
    pool_size = 2,
    strides = 1,
    padding = 'valid'
)(image_x)
digit_selector = mann.layers.SelectorLayer(0)(image_x)
fashion_selector = mann.layers.SelectorLayer(1)(image_x)
digit_flatten = tf.keras.layers.Flatten()(digit_selector)
fashion_flatten = tf.keras.layers.Flatten()(fashion_selector)
image_x = mann.layers.MultiMaskedDense(256, activation = 'relu')([digit_flatten, fashion_flatten])

digit_x = mann.layers.SelectorLayer(0)(image_x)
fashion_x = mann.layers.SelectorLayer(1)(image_x)
boston_x = mann.layers.MaskedDense(256, activation = 'relu')(boston_input)
boston_x = mann.layers.MaskedDense(256, activation = 'relu')(boston_x)
boston_x = mann.layers.MaskedDense(256, activation = 'relu')(boston_x)

x = mann.layers.MultiMaskedDense(256, activation = 'relu')([digit_x, fashion_x, boston_x])

digit_selector = mann.layers.SelectorLayer(0)(x)
fashion_selector = mann.layers.SelectorLayer(1)(x)
boston_selector = mann.layers.SelectorLayer(2)(x)

digit_output = mann.layers.MaskedDense(10, activation = 'softmax')(digit_selector)
fashion_output = mann.layers.MaskedDense(10, activation = 'softmax')(fashion_selector)
boston_output = mann.layers.MaskedDense(1, activation = 'relu')(boston_selector)

model = tf.keras.models.Model([digit_input, fashion_input, boston_input], [digit_output, fashion_output, boston_output])
model.compile(
    loss = ['sparse_categorical_crossentropy', 'sparse_categorical_crossentropy', 'mae'],
    optimizer = 'adam'
)
model.summary()

In [None]:
model = mann.utils.mask_model(
    model,
    80,
    method = 'gradients',
    exclusive = True,
    x = [digit_x_train[:boston_x_train.shape[0], :], fashion_x_train[:boston_x_train.shape[0], :], boston_x_train],
    y = [digit_y_train[:boston_x_train.shape[0], :], fashion_y_train[:boston_x_train.shape[0], :], boston_y_train]
)

# Prepare the model for training the first two tasks only
model.compile(
    loss = ['sparse_categorical_crossentropy', 'sparse_categorical_crossentropy', 'mae'],
    optimizer = 'adam',
    loss_weights = [1, 1, 0]
)

In [None]:
# Train the first two tasks
model.fit(
    [digit_x_train, fashion_x_train, np.zeros((digit_x_train.shape[0], boston_x_train.shape[1]))],
    [digit_y_train, fashion_y_train, np.zeros(digit_y_train.shape[0])],
    epochs = 100,
    batch_size = 128,
    callbacks = [callback],
    validation_split = 0.2
)

In [None]:
# Prepare the model for training the third task
model.compile(
    loss = ['sparse_categorical_crossentropy', 'sparse_categorical_crossentropy', 'mae'],
    optimizer = 'adam',
    loss_weights = [0, 0, 1]
)
callback = tf.keras.callbacks.EarlyStopping(min_delta = 0.005, patience = 3, restore_best_weights = True)

In [None]:
# Train the third task
model.fit(
    [digit_x_train[:boston_x_train.shape[0]], fashion_x_train[:boston_x_train.shape[0]], boston_x_train],
    [digit_y_train[:boston_x_train.shape[0]], fashion_y_train[:boston_x_train.shape[0]], boston_y_train],
    epochs = 100,
    batch_size = 32,
    callbacks = [callback],
    validation_split = 0.2
)

In [None]:
# Get the predictions for all tasks and report the performance
digit_preds, fashion_preds, dummy = model.predict([digit_x_test, fashion_x_test, np.zeros((digit_x_test.shape[0], boston_x_test.shape[1]))])
dummy1, dummy2, boston_preds = model.predict([digit_x_test[:boston_x_test.shape[0]], fashion_x_test[:boston_x_test.shape[0]], boston_x_test])
digit_preds = digit_preds.argmax(axis = 1)
fashion_preds = fashion_preds.argmax(axis = 1)

print(f'Multitask Digit Accuracy: {(digit_preds.flatten() == digit_y_test.flatten()).sum()/digit_y_test.flatten().shape[0]}')
print(f'Multitask Fashion Accuracy: {(fashion_preds.flatten() == fashion_y_test.flatten()).sum()/fashion_y_test.flatten().shape[0]}')
print(f'Multitask Boston MAE: {np.abs(boston_preds.flatten() - boston_y_test.flatten()).mean()}')