# Model Source

This notebook experiments with re-implementing the model's source code inserting various event handlers for extension to n-task

In [1]:
%load_ext tensorboard

In [2]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Layer
from tensorflow.keras import Model

from collections import defaultdict
import datetime
import math
import numpy as np
import random
import time
import os

from utils import idx_load

In [3]:
def copy_metric(metric):
    """Copy a new metric instance from an existing instance"""
    return tf.keras.metrics.deserialize(tf.keras.metrics.serialize(metric))


def hrr(length, normalized=True):
    """Create a new HRR vector using Tensorflow tensors"""
    length = int(length)      
    shp = int((length-1)/2)
    if normalized:    
        x = tf.random.uniform( shape = (shp,), minval = -np.pi, maxval = np.pi, dtype = tf.dtypes.float32, seed = 100, name = None )
        x = tf.cast(x, tf.complex64)
        if length % 2:
            x = tf.math.real( tf.signal.ifft( tf.concat([tf.ones(1, dtype="complex64"), tf.exp(1j*x), tf.exp(-1j*x[::-1])], axis=0)))

        else:  
            x = tf.math.real(tf.signal.ifft(tf.concat([tf.ones(1, dtype="complex64"),tf.exp(1j*x),tf.ones(1, dtype="complex64"),tf.exp(-1j*x[::-1])],axis=0)))
    else:        
        x = tf.random.normal( shape = (length,), mean=0.0, stddev=1.0/tf.sqrt(float(length)),dtype=tf.dtypes.float32,seed=100,name=None)
    return x


def hrrs(length, n=1, normalized=True):
    """Create n new HRR vectors using Tensorflow tensors"""
    return tf.stack([hrr(length, normalized) for x in range(n)], axis=0)


def circ_conv(x, y):
    """Calculate the circular convolution between two HRR vectors"""
    x = tf.cast(x, tf.complex64)
    y = tf.cast(y, tf.complex64)
    return tf.math.real(tf.signal.ifft(tf.signal.fft(x)*tf.signal.fft(y)))


def logmod(x):
    return np.sign(x)*np.log(abs(x) + 1)
    
    
def plot(title, labels, *frameGroups):
    fig, ax = plt.subplots()
    plotFrames(ax, title, labels, *frameGroups, xlabel="Epoch", ylabel="Value")
    ax.grid()
    plt.legend()
    
    
def plotFrames(ax, title, labels, *frameGroups, xlabel=None, ylabel=None):
    for i, frames in enumerate(frameGroups):
        keys = tuple(frames.keys() if type(frames) == dict else range(len(frames)))
        t = np.arange(keys[0], keys[-1] + 1, 1)
        ax.plot(t, list(frames.values()), label=(labels[i] if labels else None))
    ax.set(xlabel=xlabel, ylabel=ylabel, title=title)

In [105]:
class Context(Layer):
    
    RESULT_UPDATED  = 0 # The ATR model was updated successfully
    RESULT_SWITCHED = 1 # A task switch was triggered in the ATR
    RESULT_ADDED    = 2 # A new task was added to the ATR model
    
    def __init__(self, num_tasks=1):
        super(Context, self).__init__()
        
        self.kernel = None
        
        # Information Tracking
        self.num_tasks = num_tasks
        self._hot_context = tf.Variable(0, name="Hot_Context", trainable=False, dtype=tf.int32)
        self.context_loss = tf.Variable(0.0, name="Context_Loss", trainable=False, dtype=float)
        
        
    def _setup_hrr_weights(self):
        # Create the HRR initializer. This will create the list of HRR vectors
        if self.kernel is None:
            initializer = lambda shape, dtype=None: hrrs(self._input_shape, n=self.num_tasks)
        else:
            # If there are previously generated HRRs, they should be retained
            kernel_arr = self.kernel.numpy()
            num_hrrs = max(0, self.num_tasks - len(kernel_arr))
            initializer = lambda shape, dtype=None: np.append(kernel_arr[:self.num_tasks], hrrs(self._input_shape, n=num_hrrs), axis=0)
        
        # Create the weights for the layer.
        # The weights in this layer are generated HRR vectors, and are never updated.
        self.kernel = self.add_weight(name="context", shape=[self.num_tasks, self._input_shape], initializer=initializer, trainable=False)
        
    
    def build(self, input_shape):
        
        # Store the input shape since weights can be rebuilt later
        self._input_shape = int(input_shape[-1])
        
        # Build the n-task information
        self._setup_hrr_weights()
        
        
    def call(self, inputs):
        """
        Calculate the output for this layer.
        
        This layer convolves the input values with the context HRR vector
        to produce the output tensor.
        """
        tf.print("Executing on task:", self.hot_context)
        
        # Fetch the hot context's HRR vector
        context_hrr = self.kernel[self.hot_context]
        
        # Return the resulting convolution between the inputs and the context HRR
        return circ_conv(inputs, context_hrr)
    
    
    def update_and_switch(self, dynamic_switch=True, verbose=1):
        """
        Update ATR values and switch contexts if necessary.
        Returns True if no context switch occurs; False otherwise
        """
        # Update the ATR values. If a task switch occurs, check if a task was added...
        
        # No task switched occurred, updated successfully
        return Context.RESULT_UPDATED
        
    
    def clear_context_loss(self):
        """Clear the context loss for the current epoch"""
        self.context_loss.assign(0.0)
    
    
    def add_context_loss(self, context_delta):
        """Accumulate context loss"""
        context_loss = tf.keras.losses.mean_squared_error(np.zeros(len(context_delta)), context_delta)
        self.context_loss.assign_add(context_loss)
        
    
    @property
    def hot_context(self):
        """Get the active context index"""
        return self._hot_context.value()
    
    
    @hot_context.setter
    def hot_context(self, hot_context):
        self._hot_context.assign(hot_context)
        self.context_loss.assign(0.0)

## Extended Model

The model below serves as a new base model for NTask

In [5]:
from collections import deque
import copy

from tensorflow.python.keras.mixed_precision.experimental import loss_scale_optimizer as lso
from tensorflow.python.data.experimental.ops import distribute_options
from tensorflow.python.data.ops import dataset_ops

from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import random_ops

from tensorflow.python.eager import backprop
from tensorflow.python.eager import context
from tensorflow.python.profiler import traceme

from tensorflow.python.distribute import distribution_strategy_context as ds_context
from tensorflow.python.distribute import parameter_server_strategy
from tensorflow.python.keras import backend
from tensorflow.python.keras import callbacks as callbacks_module
from tensorflow.python.keras.utils import version_utils
from tensorflow.python.keras.engine import training_utils
from tensorflow.python.keras.engine import data_adapter
from tensorflow.python.keras.engine import training
from tensorflow.python.util import nest

In [6]:
# Borrowed from https://github.com/tensorflow/tensorflow/blob/v2.2.0/tensorflow/python/keras/engine/data_adapter.py
try:
    import pandas as pd  # pylint: disable=g-import-not-at-top
except ImportError:
    pd = None

In [7]:
def _minimize(strategy, tape, optimizer, loss, trainable_variables):
    """Minimizes loss for one step by updating `trainable_variables`.
    This is roughly equivalent to
    ```python
    gradients = tape.gradient(loss, trainable_variables)
    self.optimizer.apply_gradients(zip(gradients, trainable_variables))
    ```
    However, this function also applies gradient clipping and loss scaling if the
    optimizer is a LossScaleOptimizer.
    Args:
      strategy: `tf.distribute.Strategy`.
      tape: A gradient tape. The loss must have been computed under this tape.
      optimizer: The optimizer used to minimize the loss.
      loss: The loss tensor.
      trainable_variables: The variables that will be updated in order to minimize
        the loss.
    Return:
      gradients
    """

    with tape:
        if isinstance(optimizer, lso.LossScaleOptimizer):
            loss = optimizer.get_scaled_loss(loss)

    gradients = tape.gradient(loss, trainable_variables)

    # Whether to aggregate gradients outside of optimizer. This requires support
    # of the optimizer and doesn't work with ParameterServerStrategy and
    # CentralStroageStrategy.
    aggregate_grads_outside_optimizer = (
        optimizer._HAS_AGGREGATE_GRAD and  # pylint: disable=protected-access
        not isinstance(strategy.extended,
                       parameter_server_strategy.ParameterServerStrategyExtended))

    if aggregate_grads_outside_optimizer:
        # We aggregate gradients before unscaling them, in case a subclass of
        # LossScaleOptimizer all-reduces in fp16. All-reducing in fp16 can only be
        # done on scaled gradients, not unscaled gradients, for numeric stability.
        gradients = optimizer._aggregate_gradients(zip(gradients,  # pylint: disable=protected-access
                                                       trainable_variables))
    if isinstance(optimizer, lso.LossScaleOptimizer):
        gradients = optimizer.get_unscaled_gradients(gradients)
    gradients = optimizer._clip_gradients(gradients)  # pylint: disable=protected-access
    if trainable_variables:
        if aggregate_grads_outside_optimizer:
            optimizer.apply_gradients(
                zip(gradients, trainable_variables),
                experimental_aggregate_gradients=False)
        else:
            optimizer.apply_gradients(zip(gradients, trainable_variables))
    return gradients

In [8]:
# Extended from https://github.com/tensorflow/tensorflow/blob/v2.2.0/tensorflow/python/keras/engine/data_adapter.py
class WindowedDataHandler(data_adapter.DataHandler):
    """
    Enumerating over this data handler yields windows of the dataset.
    This is important for n-task because if a context switch occurs
    during an epoch the data needs to be sent back through the network.
    """
    def calc_window_size(self):
        batch_size = self._adapter.batch_size()
        num_samples = self._inferred_steps*batch_size
        if self._adapter.has_partial_batch():
            num_samples -= batch_size - self._adapter.partial_batch_size()
        return np.ceil(num_samples/min(batch_size, num_samples))
    
    def enumerate_epochs(self):
        data_iterator = iter(self._dataset.window(self.calc_window_size()))
        for epoch in range(self._initial_epoch, self._epochs):
            if self._insufficient_data:
                break
            if self._adapter.should_recreate_iterator():
                data_iterator = iter(self._dataset.window(self.calc_window_size()))
            yield epoch, data_iterator
            self._adapter.on_epoch_end()

In [9]:
# Extended from https://github.com/tensorflow/tensorflow/blob/v2.2.0/tensorflow/python/keras/engine/training.py
class NTaskModelBase(Model):
    """
    This abstract model integrates the raw mechanisms and handlers into
    Tensorflow Keras' model class. These mechanisms can be implemented by
    inheriting from this class.
    """
    
    def __init__(self, *args, **kwargs):
        super(NTaskModelBase, self).__init__(*args, **kwargs)
        self.accumulate_gradients = False
        self.accumulated_gradients = None
        
        
    def compile(self, *args, accumulate_gradients=False, **kwargs):
        super(NTaskModelBase, self).compile(*args, **kwargs)
        
        # TODO
        if accumulate_gradients:
            self.accumulate_gradients = True
        
    
    def train_step(self, data):
        data = data_adapter.expand_1d(data)
        x, y, sample_weight = data_adapter.unpack_x_y_sample_weight(data)

        with backprop.GradientTape() as tape:
            y_pred = self(x, training=True)
            loss = self.compiled_loss(y, y_pred, sample_weight, regularization_losses=self.losses)
            
        gradients = _minimize(self.distribute_strategy, tape, self.optimizer, loss,
              self.trainable_variables)
        
        # Add context loss to layers
        self.add_context_loss(gradients)

        self.compiled_metrics.update_state(y, y_pred, sample_weight)
        return {m.name: m.result() for m in self.metrics}
    
    
    @training.enable_multi_worker
    def fit(self,
            x=None,
            y=None,
            batch_size=None,
            epochs=1,
            verbose=1,
            dynamic_switch=True,
            callbacks=None,
            validation_split=0.,
            validation_data=None,
            shuffle=True,
            class_weight=None,
            sample_weight=None,
            initial_epoch=0,
            steps_per_epoch=None,
            validation_steps=None,
            validation_batch_size=None,
            validation_freq=1,
            max_queue_size=10,
            workers=1,
            use_multiprocessing=False):
        
        tf.print("Hot contexts:", [layer.hot_context for layer in self.layers if isinstance(layer, Context)])

        training._keras_api_gauge.get_cell('fit').set(True)
        # Legacy graph support is contained in `training_v1.Model`.
        version_utils.disallow_legacy_graph('Model', 'fit')
        self._assert_compile_was_called()
        self._check_call_args('fit')

        if validation_split:
            # Create the validation data using the training data. Only supported for
            # `Tensor` and `NumPy` input.
            (x, y, sample_weight), validation_data = (
            data_adapter.train_validation_split((x, y, sample_weight),
                                                validation_split=validation_split,
                                                shuffle=False))

        with self.distribute_strategy.scope(), training_utils.RespectCompiledTrainableState(self):
            # Creates a `tf.data.Dataset` and handles batch and epoch iteration.
            data_handler = WindowedDataHandler(
                x=x,
                y=y,
                sample_weight=sample_weight,
                batch_size=batch_size,
                steps_per_epoch=steps_per_epoch,
                initial_epoch=initial_epoch,
                epochs=epochs,
                shuffle=shuffle,
                class_weight=class_weight,
                max_queue_size=max_queue_size,
                workers=workers,
                use_multiprocessing=use_multiprocessing,
                model=self)

            # Container that configures and calls `tf.keras.Callback`s.
            if not isinstance(callbacks, callbacks_module.CallbackList):
                callbacks = callbacks_module.CallbackList(
                    callbacks,
                    add_history=True,
                    add_progbar=verbose != 0,
                    model=self,
                    verbose=verbose,
                    epochs=epochs,
                    steps=data_handler.inferred_steps)

            self.stop_training = False
            train_function = self.make_train_function()
            callbacks.on_train_begin()
            # Handle fault-tolerance for multi-worker.
            # TODO(omalleyt): Fix the ordering issues that mean this has to
            # happen after `callbacks.on_train_begin`.
            data_handler._initial_epoch = (self._maybe_load_initial_epoch_from_ckpt(initial_epoch))
            for epoch, window_iterator in data_handler.enumerate_epochs():
                self.reset_metrics()
                callbacks.on_epoch_begin(epoch)
                dataset = tf.data.Dataset.zip(next(window_iterator))
                switched = True
                weights = backend.batch_get_value(self.trainable_variables)
                while switched:
                    self.initialize_epoch(epoch)
                    iterator = iter(dataset)
                    with data_handler.catch_stop_iteration():
                        for step in data_handler.steps():
                            with traceme.TraceMe( 'TraceContext', graph_type='train', epoch_num=epoch, step_num=step, batch_size=batch_size):
                                callbacks.on_train_batch_begin(step)
                                tmp_logs = train_function(iterator)
                                # Catch OutOfRangeError for Datasets of unknown size.
                                # This blocks until the batch has finished executing.
                                # TODO(b/150292341): Allow multiple async steps here.
                                if not data_handler.inferred_steps:
                                    context.async_wait()
                                logs = tmp_logs  # No error, now safe to assign to logs.
                                callbacks.on_train_batch_end(step, logs)
                        switched = not self.update_and_switch(dynamic_switch, verbose)
                        # If a switch occurred, we need to restore the weights
                        if switched:
                            backend.batch_set_value(zip(self.trainable_variables, weights))
                            self.reset_metrics()
                    
                epoch_logs = copy.copy(logs)
                
                if self.accumulate_gradients:
                    self.optimizer.apply_gradients(zip(self.accumulated_gradients, self.trainable_variables))

                # Run validation.
                if validation_data and self._should_eval(epoch, validation_freq):
                    val_x, val_y, val_sample_weight = (
                        data_adapter.unpack_x_y_sample_weight(validation_data))
                    val_logs = self.evaluate(
                        x=val_x,
                        y=val_y,
                        sample_weight=val_sample_weight,
                        batch_size=validation_batch_size or batch_size,
                        steps=validation_steps,
                        callbacks=callbacks,
                        max_queue_size=max_queue_size,
                        workers=workers,
                        use_multiprocessing=use_multiprocessing,
                        return_dict=True)
                    val_logs = {'val_' + name: val for name, val in val_logs.items()}
                    epoch_logs.update(val_logs)

                callbacks.on_epoch_end(epoch, epoch_logs)
                if self.stop_training:
                    break

            callbacks.on_train_end()
            return self.history
        
    def add_context_loss(self, gradients):
        """Calculate and add context loss to context layers"""
        return
        raise NotImplemented("`add_context_loss` not implemented")
        
        
    def initialize_epoch(self, epoch):
        """Reset context loss in context layers"""
        return
        raise NotImplemented("`initialize_epoch` not implemneted")
        
        
    def update_and_switch(self, dynamic_switch=True, verbose=0):
        """
        Update the context layers
        
        Args:
            dynamic_switch [bool]: Enable/disable dynamic switching mechanisms
        Return:
            [bool]: Indicate if no switches occurred
        """
        return True
        raise NotImplemented("`update_and_switch` not implemented")

In [10]:
class NTaskModel(NTaskModelBase):
    def __init__(self, *args, **kwargs):
        super(NTaskModel, self).__init__(*args, **kwargs)
        self.ctx_layers = [i for i, layer in enumerate(self.layers) if isinstance(layer, Context)]
        
        # We need to map the context layer to their gradient indices
        self.ctx_gradient_map = {}
        index = 0
        for i, layer in enumerate(self.layers):
            if isinstance(layer, Context):
                self.ctx_gradient_map[i] = index + 1 # The bias gradient
            index += len(layer.trainable_variables)
    
    
    def _calc_context_loss(self, ctx_layer_idx, gradients):
        """
        IMPORTANT: 
        1) Assumes no use of activation function on Ntask layer
        2) Assumes that the layer following the Ntask layer:
            a) Is a Dense layer
            b) Is using bias 
               — ex: Dense(20, ... , use_bias=True) 
               — note Keras Dense layer uses bias by default if no value is given for use_bias param
        3) Assumes index of the next layer's gradient is known within the gradients list returned from gradient tape in a tape.gradient call
        4) If the above points aren't met, things will break and it may be hard to locate the bugs
        """
        # From the delta rule in neural network math
        index = self.ctx_gradient_map[ctx_layer_idx]
        delta_at_next_layer = gradients[index]
        transpose_of_weights_at_next_layer = tf.transpose(self.layers[ctx_layer_idx + 1].weights[0])
#         transpose_of_weights_at_next_layer = tf.transpose(self.layers[index + 1].get_weights()[0])
        
        # Calculate delta at n-task layer
#         context_delta = np.dot(delta_at_next_layer, transpose_of_weights_at_next_layer).astype(np.float)
        context_delta = tf.tensordot(delta_at_next_layer, transpose_of_weights_at_next_layer, 1)
        return context_delta
    
    
    def initialize_epoch(self, epoch):            
        # Clear context loss (probably going to use a new mechanism here)
        for i in self.ctx_layers:
            self.layers[i].clear_context_loss()
            
    
    def add_context_loss(self, gradients):
        for i in self.ctx_layers:
            self.layers[i].add_context_loss(self._calc_context_loss(i, gradients))
    
    
    def update_and_switch(self, dynamic_switch, verbose):
        switched = False
        for i in reversed(self.ctx_layers):
            layer = self.layers[i]
            result = layer.update_and_switch(dynamic_switch=dynamic_switch, verbose=verbose)
            if result & Context.RESULT_SWITCHED:
                switched = True
        return not switched
    

    def set_contexts(self, contexts):
        for i, layer in enumerate(self.ctx_layers):
            self.layers[layer].hot_context = contexts[i]

___

In [11]:
class AtrLogger(tf.keras.callbacks.BaseLogger):
    
    def __init__(self, logdir, *args, **kwargs):
        super(AtrLogger, self).__init__(*args, **kwargs)
        self.logdir = logdir
        self.writers = {}
        
    def set_model(self, model):
        super(AtrLogger, self).set_model(model)
        self.writers = {self.model.layers[i]: [] for i in self.model.ctx_layers}
        
    def on_epoch_end(self, epoch, logs=None):
        """Create the correct number of writers for the task if necessary"""
        for layer, writers in self.writers.items():
            for i in range(len(writers), layer.num_tasks):
                writers.append(tf.summary.create_file_writer(os.path.join(self.logdir, f"context_atr_{i}"))) # TODO Fix this name here...
            plot_tag = f"context_atr_trace"                                                                  # TODO Fix this name here too...
            for i, writer in enumerate(writers):
                with writer.as_default():
                    value = layer.atr_model.atr_values[i]
                    if value is not None:
                        tf.summary.scalar(plot_tag, data=value, step=epoch)
    
#     def on_epoch_end(self, epoch, logs=None):
#         self._update_writers()
#         v = np.random.random()
#         for i, writer in enumerate(self.writers):
#             with writer.as_default():
#                 tf.summary.scalar("atr_traces", data=v+i, step=epoch)

# Test

In [12]:
def test(ModelClass, init_args, compile_args, x_train, y_train, seed=5, **kwargs):
    # Set the random seed for all used libraries
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    
    # Create the model
    inp = Input(x_train[0].shape)
    x = Dense(128, activation="relu")(inp)
    x = Dense(1, activation="sigmoid")(x)
    model = ModelClass(inputs=inp, outputs=x, **init_args)
    
    # Compile the model
    model.compile(
        loss=tf.keras.losses.BinaryCrossentropy(),
        optimizer=tf.keras.optimizers.SGD(1e-4),
        **compile_args
    )
    
    # Train the model
    model.fit(x_train, y_train, **kwargs)
    
    # Calculate and display the accuracy
    result = (np.round(model(x_train)).astype(int).flatten() == y_train.flatten()).sum()
    print(f"{result}/{len(y_train)}; Accuracy: {100*result/len(y_train):.2f}%")

In [25]:
def test_context(ModelClass, init_args, compile_args, x_train, y_train_list, cycles=1, seed=5, epochs=1, **kwargs):
    # Set the random seed for all used libraries
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    
    # Create the model
    inp = Input(x_train[0].shape)
    x = Dense(128, activation="relu", use_bias=True)(inp)
    x = Context(num_tasks=2)(x)
    x = Dense(1, activation="sigmoid", use_bias=True)(x)
    model = ModelClass(inputs=inp, outputs=x, **init_args)
    
    # Compile the model
    model.compile(
        loss=tf.keras.losses.BinaryCrossentropy(),
        optimizer=tf.keras.optimizers.SGD(1e-1),
        **compile_args
    )
    
    logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
    callbacks = [
        tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1),
#         AtrLogger(logdir)
    ]
    
    # Train the model
    for cycle in range(cycles):
        for context, y_train in enumerate(y_train_list):
            initial_epoch = cycle*len(y_train_list)*epochs + context*epochs
            model.set_contexts([context])
            model.fit(x_train, y_train, callbacks=callbacks, dynamic_switch=False, initial_epoch=initial_epoch, epochs=initial_epoch + epochs, **kwargs)
    
    for context in range(len(y_train_list)):
        model.set_contexts([context])
        tf.print(model.predict(x_train))
    
    # Calculate and display the accuracy
    result = (np.round(model(x_train)).astype(int).flatten() == y_train.flatten()).sum()
    print(f"{result}/{len(y_train)}; Accuracy: {100*result/len(y_train):.2f}%")
    
    return model

### Dataset

In [16]:
# Training images
training_images = idx_load("../datasets/mnist/train-images.idx3-ubyte")
training_images.shape

(60000, 28, 28)

In [17]:
# Training labels
training_labels = idx_load("../datasets/mnist/train-labels.idx1-ubyte")
training_labels.shape

(60000,)

In [18]:
# Normalize the datasets
training_images = training_images.reshape(len(training_images), 28*28) / 255.0

In [14]:
logic_gate_labels = np.array([
    [[0], [1], [1], [0]], # XOR
    [[1], [0], [0], [1]], # XNOR
    [[0], [0], [0], [1]], # AND
    [[0], [1], [1], [1]], # OR
    [[1], [0], [0], [0]], # NOR
    [[1], [1], [1], [0]], # NAND
    [[1], [0], [1], [0]], # Custom 1
    [[0], [1], [0], [1]]  # Custom 2
])

logic_gate_inputs = np.array([[-1, -1], [-1, 1], [1, -1], [1, 1]])

___

### Model Evaluation

In [17]:
# MNIST number is even
x_train = training_images
y_train = np.array([int(i % 2 == 0) for i in training_labels])

In [18]:
# Verify on the first 10 the dataset seems correct...
print(training_labels[:10])
print(y_train[:10])

[5 0 4 1 9 2 1 3 1 4]
[0 1 1 0 0 1 0 0 0 1]


In [20]:
s = time.time()
test(Model, x_train, y_train, epochs=10, batch_size=64, verbose=1)
print(time.time() - s)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
48394/60000; Accuracy: 80.66%
13.526658296585083


In [23]:
%time test(NTaskModel, {}, {'accumulate_gradients': False}, x_train, y_train, epochs=10, batch_size=64, verbose=1)

DictWrapper({})
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
48394/60000; Accuracy: 80.66%
CPU times: user 28.2 s, sys: 16.2 s, total: 44.4 s
Wall time: 13.8 s


In [243]:
%time test(NTaskModelBase, {}, {}, logic_gate_inputs, logic_gate_labels[0], epochs=500, batch_size=1, verbose=0)

2/4; Accuracy: 50.00%
CPU times: user 3.06 s, sys: 406 ms, total: 3.47 s
Wall time: 2.65 s


In [79]:
%time test(NTaskModel, {}, {}, logic_gate_inputs, logic_gate_labels[0], epochs=1, batch_size=4, verbose=1)

DictWrapper({})
2/4; Accuracy: 50.00%
CPU times: user 250 ms, sys: 15.6 ms, total: 266 ms
Wall time: 243 ms


In [106]:
%time model = test_context(NTaskModel, {}, {"metrics": [tf.keras.metrics.BinaryAccuracy()]}, logic_gate_inputs, logic_gate_labels[:1], cycles=1, epochs=20, batch_size=1, verbose=0)

Hot contexts: [0]
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on task: 0
Executing on tas

In [109]:
model.set_contexts([1])

In [110]:
model.predict(logic_gate_inputs)

Executing on task: 1


array([[0.4808417 ],
       [0.5394176 ],
       [0.7024284 ],
       [0.33326566]], dtype=float32)

In [33]:
!kill 28162

In [None]:
%tensorboard --logdir logs

In [64]:
v = tf.Variable(1, trainable=False, dtype=tf.int32)

In [65]:
v.numpy()

1

In [67]:
v.value()

<tf.Tensor: shape=(), dtype=int32, numpy=1>

In [62]:
a = [1, 2, 3]

In [69]:
a[v]

TypeError: list indices must be integers or slices, not ResourceVariable