# Using the MANN Package to train a Fully Connected Neural Network

In this notebook, the MANN package will be used to train pruned fully connected neural networks.  We will train two single-task networks on two separate tasks and one multitask network which performs both tasks.

In [None]:
# Load the MANN package and TensorFlow
import tensorflow as tf
import beyondml.tflow as mann

# Load the make_classification function from scikit-learn
from sklearn.datasets import make_classification

In [None]:
# We will use two separate generated datasets
x1, y1 = make_classification(
    n_samples = 10000,
    n_features = 10,
    n_informative = 8,
    n_classes = 2,
    n_clusters_per_class = 1
)

x2, y2 = make_classification(
    n_samples = 10000,
    n_features = 20,
    n_informative = 13,
    n_classes = 10,
    n_clusters_per_class = 1
)

# Flatten the outputs for simplicity
y1 = y1.reshape(-1, 1)
y2 = y2.reshape(-1, 1)

# Create a callback to stop training early
callback = tf.keras.callbacks.EarlyStopping(min_delta = 0.01, patience = 3, restore_best_weights = True)

## Create the first model

This first model is a fully connected model which will perform the first task. It will be pruned utilizing the MANN package so that most of its weights are 0.

In [None]:
# After data generation, create the single-task model using the TensorFlow Keras Functional API
input_layer = tf.keras.Input(x1.shape[-1])

# Instead of using keras Dense Layers, use MANN MaskedDense Layers
x = mann.layers.MaskedDense(
    100,
    activation = 'relu'
)(input_layer)

for _ in range(5):
    x = mann.layers.MaskedDense(
        100,
        activation = 'relu'
    )(x)

# Create the output layer as another MANN MaskedDense Layer
output_layer = mann.layers.MaskedDense(1, activation = 'sigmoid')(x)

# Create the model
model = tf.keras.Model(input_layer, output_layer)

In [None]:
# Compile the model for training and masking
model.compile(
    loss = 'binary_crossentropy',
    metrics = ['accuracy'],
    optimizer = 'adam'
)

# Mask (prune) the model using the MANN package
model = mann.utils.mask_model(
    model = model,              # The model to be pruned
    percentile = 90,            # The percentile to be masked, for example, if the value is 90, then 90% of weights will be masked
    method = 'gradients',       # The method to use to mask, either 'gradients' or 'magnitude'
    exclusive = True,           # Whether weight locations must be exclusive to each task
    x = x1[:2000],              # The input data
    y = y1[:2000]               # The expected outputs
)

# Recompile the model
model.compile(
    loss = 'binary_crossentropy',
    metrics = ['accuracy'],
    optimizer = 'adam'
)

In [None]:
# To show how the layers of the model have been pruned, output the kernel of the first MaskedDense Layer
model.layers[1].get_weights()[0]

In [None]:
# Fit the model on the first dataset
model.fit(x1, y1, batch_size = 128, epochs = 100, validation_split = 0.2, callbacks = [callback])
print(f'First Model Accuracy: {((model.predict(x1)>= 0.5).astype(int).flatten() == y1.flatten()).sum()/y1.shape[0]}')

## Create the second model

This second model is a fully connected model which will perform the second task. It will be pruned utilizing the MANN package so that most of its weights are 0.

In [None]:
# Create the second model
input_layer = tf.keras.Input(x2.shape[-1])

# Instead of using keras Dense Layers, use MANN MaskedDense Layers
x = mann.layers.MaskedDense(
    100,
    activation = 'relu'
)(input_layer)

for _ in range(5):
    x = mann.layers.MaskedDense(
        100,
        activation = 'relu'
    )(x)

# Create the output layer as another MANN MaskedDense Layer
output_layer = mann.layers.MaskedDense(10, activation = 'softmax')(x)

# Create the model
model = tf.keras.Model(input_layer, output_layer)

In [None]:
# Repeat the pruning process for the second model
model.compile(loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'], optimizer = 'adam')

model = mann.utils.mask_model(
    model = model,
    percentile = 90,
    method = 'gradients',
    exclusive = True,
    x = x2[:2000],
    y = y2.reshape(-1, 1)[:2000]
)

model.compile(loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'], optimizer = 'adam')

model.fit(x2, y2, epochs = 100, batch_size = 128, validation_split = 0.2, callbacks = [callback])

print(f'Second Model Accuracy: {(model.predict(x2).argmax(axis = 1) == y2.flatten()).astype(int).sum()/y2.shape[0]}')

## Create the MANN

The third and final model we create here will be a multitask model (MANN) which performs both tasks.

In [None]:
# Train a Multitask Model

input1 = tf.keras.layers.Input(x1.shape[-1])
input2 = tf.keras.layers.Input(x2.shape[-1])
dense1 = mann.layers.MaskedDense(100, activation = 'relu')(input1)
dense2 = mann.layers.MaskedDense(100, activation = 'relu')(input2)
x = mann.layers.MultiMaskedDense(100, activation = 'relu')([dense1, dense2])
for _ in range(4):
    x = mann.layers.MultiMaskedDense(100, activation = 'relu')(x)
sel1 = mann.layers.SelectorLayer(0)(x)
sel2 = mann.layers.SelectorLayer(1)(x)
output1 = mann.layers.MaskedDense(1, activation = 'sigmoid')(sel1)
output2 = mann.layers.MaskedDense(10, activation = 'sigmoid')(sel2)

model =  tf.keras.Model([input1, input2], [output1, output2])
model.compile(
    loss = ['binary_crossentropy', 'sparse_categorical_crossentropy'],
    metrics = ['accuracy'],
    optimizer = 'adam'
)
model = mann.utils.mask_model(
    model,
    90,
    method = 'gradients',
    exclusive = True,
    x = [x1[:2000], x2[:2000]],
    y = [y1.reshape(-1, 1)[:2000], y2.reshape(-1, 1)[:2000]]
)
model.compile(
    loss = ['binary_crossentropy', 'sparse_categorical_crossentropy'],
    metrics = ['accuracy'],
    optimizer = 'adam'
)

model.fit([x1, x2], [y1, y2], epochs = 100, batch_size = 128, callbacks = [callback], validation_split = 0.2)
p1, p2 = model.predict([x1, x2])
p1 = (p1 >= 0.5).astype(int)
p2 = p2.argmax(axis = 1)

# Predict Using the MANN

Now that the MANN model has been trained, we can use it to get predictions just as we would a traditional model. In this case, a list of predictions are returned, with each index corresponding to the task.

In [None]:
print(f'Multitask Task 1 Accuracy: {(p1.flatten() == y1.flatten()).sum()/y1.flatten().shape[0]}')
print(f'Multitask Task 2 Accuracy: {(p2.flatten() == y2.flatten()).sum()/y2.flatten().shape[0]}')