In [115]:
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.layers import Dense
import numpy as np

%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [4]:
(train_ds, test_ds), ds_info = tfds.load('mnist', split=['train', 'test'], as_supervised=True, with_info=True)

Metal device set to: Apple M1 Pro

systemMemory: 32.00 GB
maxCacheSize: 10.67 GB



2022-11-26 10:18:44.163389: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-11-26 10:18:44.163528: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [84]:
# 2.2 - set up data pipiline
def prepare_mnist_data(mnist):
    # map from uint8 to tf.float
    mnist = mnist.map(lambda img, target: (tf.cast(img, tf.float32), (tf.cast(target, tf.float32))))

    # flatten input
    mnist = mnist.map(lambda img, target: (tf.reshape(img, (-1, )), target))
    
    # normalize input to gaussian distribution or divide by 128
    mnist = mnist.map(lambda img, target: (((img/128)-1), target))

    # zip two images together
    #a = mnist.take(30000)
    #b = mnist.skip(30000).take(30000)
    #a, b = tf.split(mnist, 2, 1)
    #a = mnist.shard(num_shards=2, index=0)
    #b = mnist.shard(num_shards=2, index=1)
    mnist = tf.data.Dataset.zip((mnist.shuffle(2000), mnist.shuffle(2000)))
    mnist = mnist.map(lambda x1, x2: (x1[0], x2[0], x1[1]+x2[1]>=5))
    zipped_ds = mnist.map(lambda x1, x2, t: (x1,x2, tf.cast(t, tf.int32)))

    # create target for subtask 1

    # keep the progess in memory
    mnist = mnist.cache()
    mnist = mnist.shuffle(1000) 
    mnist = mnist.batch(32) # 32 image in one batch
    mnist = mnist.prefetch(20) # prepare 20 next datapoints 

    return mnist

In [86]:
train_dataset = train_ds.apply(prepare_mnist_data)
test_dataset = test_ds.apply(prepare_mnist_data)

In [113]:
# 2.3 - build network
class MyModel(tf.keras.Model):
    def __init__(self) -> None:
        super(MyModel, self).__init__()
        self.dense_input1 = tf.keras.layers.Dense(256, activation=tf.nn.relu)
        self.dense_input2 = tf.keras.layers.Dense(256, activation=tf.nn.relu)
        self.dense1 = tf.keras.layers.Dense(256, activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(256, activation=tf.nn.relu)
        self.out = tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)

    @tf.function
    def call(self, input1, input2):
        input1 = self.dense_input1(input1) # still to decide if this is the right data
        input2 = self.dense_input2(input2) 
        input3 = tf.math.add(input1, input2)
        print(input3.shape)
        x = self.dense1(input3)
        x = self.dense2(x)
        x = self.out(x)
        return x
        

In [110]:
# 2.4 - training the network
def train_step(model, input1, input2, target, loss_function, optimizer):

    # loss object and optimizer and are instances of respective tensorflow classes 
    with tf.GradientTape() as tape:
        prediction = model(input1, input2)
        loss = loss_function(target, prediction)
    
    gradients = tape.gradient(loss, model.trainable_variables) # all variables with trainable = True
    optimizer.apply_gradients(zip(gradients, model.trainable_variables)) # updating weights with optimizer
    return loss

# 2.4.1 - testing the model
def test(model, test_data, loss_function):

    test_accuracy_aggregator = []
    test_loss_aggregator = [] # continuous

    # input is batch of 32 examples
    for input1,input2, target in test_data:
        prediction = model(input1, input2)
        sample_test_loss = loss_function(target, prediction)
        sample_test_accuracy = np.argmax(target, axis=1) == np.argmax(prediction, axis=1)
        sample_test_accuracy = np.mean(sample_test_accuracy)
        test_loss_aggregator.append(sample_test_loss.numpy())
        test_accuracy_aggregator.append(np.mean(sample_test_accuracy))

    test_loss = tf.reduce_mean(test_loss_aggregator)
    test_accuracy = tf.reduce_mean(test_accuracy_aggregator)

    return test_loss, test_accuracy

In [111]:
# Running the network
# Training
epochs = 10
learning_rate = 0.001

model = MyModel()
cross_entropy_loss = tf.keras.losses.BinaryCrossentropy()
optimizer = tf.keras.optimizers.SGD(learning_rate)

# for visualization
train_losses = []
test_losses = []
test_accuracies = []

# test model before training
test_loss, test_accuracy = test(model, test_dataset, cross_entropy_loss)
test_losses.append(test_loss)
test_accuracies.append(test_accuracy)

train_loss, _ = test(model, train_dataset, cross_entropy_loss)
train_losses.append(train_loss)

for epoch in range(epochs):
    print(f'Epoch: {str(epoch)} starting with accuracy {test_accuracies[-1]}')

    # training 
    epoch_loss_agg = []
    for input1,input2,target in train_dataset:
        print(input.shape)
        #train_loss = train_step(model, input1, input2, target, cross_entropy_loss, optimizer)
        #epoch_loss_agg.append(train_loss)

    # track training loss
    train_losses.append(tf.reduce_mean(epoch_loss_agg))

    # track accuracy and test loss
    test_loss, test_accuracy = test(model, test_dataset, cross_entropy_loss)
    test_losses.append(test_loss)
    test_accuracies.append(test_accuracy)

#visualization(train_losses, test_losses, test_accuracies)

    

<PrefetchDataset element_spec=(TensorSpec(shape=(None, 784), dtype=tf.float32, name=None), TensorSpec(shape=(None, 784), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.bool, name=None))>


2022-11-26 13:26:59.584221: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.
2022-11-26 13:26:59.584942: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.
2022-11-26 13:26:59.588724: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. I

OperatorNotAllowedInGraphError: Exception encountered when calling layer "my_model_9" "                 f"(type MyModel).

in user code:

    File "/var/folders/2q/hwnn9141093b7bkbnd4bm7tc0000gn/T/ipykernel_20812/1643765698.py", line 16, in call  *
        input3 = tf.keras.layers.ReLU(tf.math.add(input1, input2))
    File "/Users/leonackermann/miniforge3/envs/iannwtf/lib/python3.10/site-packages/keras/layers/activation/relu.py", line 79, in __init__  **
        if max_value is not None and max_value < 0.0:

    OperatorNotAllowedInGraphError: Using a symbolic `tf.Tensor` as a Python `bool` is not allowed: AutoGraph did convert this function. This might indicate you are trying to use an unsupported feature.


Call arguments received by layer "my_model_9" "                 f"(type MyModel):
  • input1=tf.Tensor(shape=(32, 784), dtype=float32)
  • input2=tf.Tensor(shape=(32, 784), dtype=float32)

In [116]:
learning_rate = 0.001

model = MyModel()
cross_entropy_loss = tf.keras.losses.BinaryCrossentropy()
optimizer = tf.keras.optimizers.SGD(learning_rate)

test_loss, test_accuracy = test(model, test_dataset, cross_entropy_loss)

<PrefetchDataset element_spec=(TensorSpec(shape=(None, 784), dtype=tf.float32, name=None), TensorSpec(shape=(None, 784), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.bool, name=None))>
(32, 256)
(32, 256)


2022-11-26 13:30:39.190093: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2022-11-26 13:30:39.223688: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.
2022-11-26 13:30:39.225201: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instea

AxisError: axis 1 is out of bounds for array of dimension 1