In [None]:
#| default_exp callbacks

# Callbacks

> Useful callbacks to use with the functional layers.

In [None]:
#| export
import matplotlib.pyplot as plt

import wandb

import tensorflow as tf
from tensorflow.keras.callbacks import Callback

from flayers.layers import *

In [None]:
#| hide
import wandb
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist
from einops import repeat

## Gabor parameter logging

> Logging Gabor parameters into *wandb*

We found that sometimes, during training, an error would rise regarding the inverse of the matrix during the calculation of the Gabor filters. Our first thought was that the covariance matrix (calculated with the parameters `sigma_i` and `sigma_j`) wasn't invertible, meaning that `sigma_i` and `sigma_j` were non-positive, but a constraint on the variables did not fix the problem. To inspect it in more detail, we are going to log all the layer's weights during training to *wandb* to try and find the root of the problem.

> To avoid introducing dependencies that won't be used by many people, we can put the `import wandb` in the instantiation of the callback.

In [None]:
#| export

class GaborLayerLogger(Callback):
    import wandb

    """Logs the gabor parameters into wandb during training."""
    def on_train_batch_end(self, 
                           batch, # Batch number.
                           logs=None, # Dictionary containing metrics and information of the training.
                           ):
        """Logs the gabor parameters after each batch (after each parameter update)."""
        for layer in self.model.layers:
            if isinstance(layer, GaborLayer):
                for weight in layer.weights:
                    wandb.log({f'{layer.name}.{weight.name}': wandb.Histogram(weight)})

In [None]:
#| export

class GaborLayerSeqLogger(Callback):
    import wandb

    """Logs the gabor parameters into wandb during training."""
    def on_train_batch_end(self, 
                           batch, # Batch number.
                           logs=None, # Dictionary containing metrics and information of the training.
                           ):
        """Logs the gabor parameters after each batch (after each parameter update)."""
        for layer in self.model.feature_extractor.layers:
            if isinstance(layer, GaborLayer):
                for weight in layer.weights:
                    wandb.log({f'{layer.name}.{weight.name}': wandb.Histogram(weight)})

## Logging filters as images

We can, as well, define a `class` to log any layer we want (that implements a `show_filters` method):

In [None]:
#| exporti

def log_show_filters(layer, # Layer to log functional filters from.
                     prepend="", # Text to prepend to the layer name.
                     ):
    """Logs a functional layer filters into wandb if only there is a `show_filters` method available."""
    if hasattr(layer, "show_filters"):
        layer.show_filters(show=False)
        wandb.log({f'{prepend}{layer.name}': wandb.Image(plt)})
        plt.close()

In [None]:
#| exporti

def log_show_filters_deep(layer, # Layer to go deeper.
                          ):
    """
    Logs a functional layer filters recursivelly. If a layer contains another layer that implements
    `show_filters`, it will be logged.
    """
    log_show_filters(layer)
    for attr in layer.__dir__():
        try:
            inner = getattr(layer, attr)
            log_show_filters(inner, prepend=layer.name+"_")
        except:
            pass

In [None]:
#| export

class FunctionalFilterLogger(Callback):
    """Logs the parametrics filters of any layer implementing a `show_filters` method."""
    
    def on_train_begin(self, 
                       logs=None, # Dictionary containing metrics and information of the training.
                       ):
        """Logs the parametric filters at the beggining of the training."""
        for layer in self.model.layers:
            log_show_filters_deep(layer)

    def on_epoch_end(self, 
                     epoch, # Epoch number.
                     logs=None, # Dictionary containing metrics and information of the training.
                     ):
        """Logs the parametric filters after each epoch."""
        for layer in self.model.layers:
            log_show_filters_deep(layer)

    def on_train_end(self, 
                       logs=None, # Dictionary containing metrics and information of the training.
                       ):
        """Logs the parametric filters at the end of the training."""
        for layer in self.model.layers:
            log_show_filters_deep(layer)

Let's check if it logs the parameters appropriately:

In [None]:
#| hide
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()

X_train = repeat(X_train, "b h w ->  b h w c", c=1)/255.0
X_test = repeat(X_test, "b h w ->  b h w c", c=1)/255.0

X_train.shape, Y_train.shape, X_test.shape, Y_test.shape

((60000, 28, 28, 1), (60000,), (10000, 28, 28, 1), (10000,))

## Definition of simple model

In [None]:
model = tf.keras.Sequential([
    RandomGabor(n_gabors=4, size=20, input_shape=(28,28,1)),
    layers.MaxPool2D(2),
    layers.GlobalAveragePooling2D(),
    layers.Dense(10, activation="softmax")
])
model.compile(optimizer="adam",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
random_gabor_3 (RandomGabor) (None, 28, 28, 4)         1626      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 4)         0         
_________________________________________________________________
global_average_pooling2d_1 ( (None, 4)                 0         
_________________________________________________________________
dense_1 (Dense)              (None, 10)                50        
Total params: 1,676
Trainable params: 76
Non-trainable params: 1,600
_________________________________________________________________


In [None]:
config = {
    "epochs":5,
    "batch_size":64,
}

In [None]:
wandb.init(project="Testing",
           config=config)
config = wandb.config




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

[34m[1mwandb[0m: [32m[41mERROR[0m Control-C detected -- Run data was not synced


In [None]:
model.fit(X_train, Y_train, epochs=config.epochs, batch_size=config.batch_size, callbacks=[GaborLayerLogger()])

Epoch 1/5
 33/938 [>.............................] - ETA: 47s - loss: nan - accuracy: 0.0923   

ValueError: autodetected range of [nan, nan] is not finite

In [None]:
wandb.finish()




## Filter logging

> Logging the evolution of the filters might provide interesting insigths into the behavior of the model.

In [None]:
#| export

class GaborFiltersLogger(Callback):
    import wandb

    def __init__(self,
                 batch_interval: int, # Batch interval for logging Gabor images.
                 ):
        self.batch_interval = batch_interval

    """Logs the gabor parameters into wandb during training."""
    def on_train_batch_end(self, 
                           batch, # Batch number.
                           logs=None, # Dictionary containing metrics and information of the training.
                           ):
        """Logs the gabor parameters after each batch (after `batch_interval` parameter updates)."""
        if batch % self.batch_interval == 0:
            for layer in self.model.layers:
                if isinstance(layer, GaborLayer):
                    layer.show_filters(show=False)
                    wandb.log({"gabors": plt})
                    plt.close()

In [None]:
model = tf.keras.Sequential([
    RandomGabor(n_gabors=4, size=20, input_shape=(28,28,1)),
    layers.MaxPool2D(2),
    layers.GlobalAveragePooling2D(),
    layers.Dense(10, activation="softmax")
])
model.compile(optimizer="adam",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])
model.summary()

2022-09-22 11:35:16.753100: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 2373 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 780 Ti, pci bus id: 0000:02:00.0, compute capability: 3.5
2022-09-22 11:35:16.754263: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 153 MB memory:  -> device: 1, name: Tesla K40m, pci bus id: 0000:03:00.0, compute capability: 3.5
2022-09-22 11:35:16.755522: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:2 with 5435 MB memory:  -> device: 2, name: NVIDIA GeForce GTX TITAN Black, pci bus id: 0000:83:00.0, compute capability: 3.5
2022-09-22 11:35:16.756757: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:3 with 5435 MB memory:  -> device: 3, name: NVIDIA GeForce GTX TITAN Black, 

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
random_gabor_1 (RandomGabor) (None, 28, 28, 4)         1626      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 4)         0         
_________________________________________________________________
global_average_pooling2d (Gl (None, 4)                 0         
_________________________________________________________________
dense (Dense)                (None, 10)                50        
Total params: 1,676
Trainable params: 76
Non-trainable params: 1,600
_________________________________________________________________


In [None]:
config = {
    "epochs":5,
    "batch_size":64,
}

In [None]:
wandb.init(project="Testing",
           config=config)
config = wandb.config

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mjorgvt[0m (use `wandb login --relogin` to force relogin)


In [None]:
model.fit(X_train, Y_train, epochs=config.epochs, batch_size=config.batch_size, callbacks=[GaborFiltersLogger()])

In [None]:
wandb.finish()




VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

## Catching errors

> Let's try to find the error we are getting.

In [None]:
a = RandomGabor(n_gabors=4, size=20, input_shape=(28,28,1))

2022-09-22 11:41:09.334335: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 2373 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 780 Ti, pci bus id: 0000:02:00.0, compute capability: 3.5
2022-09-22 11:41:09.335383: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 153 MB memory:  -> device: 1, name: Tesla K40m, pci bus id: 0000:03:00.0, compute capability: 3.5
2022-09-22 11:41:09.336505: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:2 with 5435 MB memory:  -> device: 2, name: NVIDIA GeForce GTX TITAN Black, pci bus id: 0000:83:00.0, compute capability: 3.5
2022-09-22 11:41:09.338080: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:3 with 5435 MB memory:  -> device: 3, name: NVIDIA GeForce GTX TITAN Black, 

In [None]:
#| export

class GaborErrorPrinter(Callback):
    import wandb

    """Prints the parameters of the Gabor layer when an error is going to happen."""
    def on_train_batch_end(self, 
                           batch, # Batch number.
                           logs=None, # Dictionary containing metrics and information of the training.
                           ):
        """Logs the gabor parameters after each batch (after each parameter update)."""
        for layer in self.model.layers:
            if isinstance(layer, GaborLayer):
                try:
                    filters = create_multiple_different_rot_gabor_tf(n_gabors=layer.n_gabors, Nrows=layer.Nrows, Ncols=layer.Ncols, imean=layer.imean, jmean=layer.jmean, sigma_i=layer.sigma_i, sigma_j=layer.sigma_j,
                                                                     freq=layer.freq, theta=layer.theta, rot_theta=layer.rot_theta, sigma_theta=layer.sigma_theta, fs=layer.fs, normalize=layer.normalize)
                except:
                    print("ERROR IN THE CALCULATION OF THE GABOR FILTERS!!")
                    print("STOPPING TRAINING")
                    self.model.stop_training = True
                    attrs = {k:v for k, v in layer.__dict__.items() if k[0]!="_"}
                    for name, value in attrs.items():
                        print(f"{name}: {value}")

In [None]:
model = tf.keras.Sequential([
    RandomGabor(n_gabors=4, size=20, input_shape=(28,28,1)),
    layers.MaxPool2D(2),
    layers.GlobalAveragePooling2D(),
    layers.Dense(10, activation="softmax")
])
model.compile(optimizer="adam",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
random_gabor_7 (RandomGabor) (None, 28, 28, 4)         1626      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 14, 14, 4)         0         
_________________________________________________________________
global_average_pooling2d_2 ( (None, 4)                 0         
_________________________________________________________________
dense_2 (Dense)              (None, 10)                50        
Total params: 1,676
Trainable params: 76
Non-trainable params: 1,600
_________________________________________________________________


In [None]:
model.fit(X_train, Y_train, epochs=100, batch_size=32, callbacks=[GaborErrorPrinter()])

Epoch 1/100
Epoch 2/100
 373/1875 [====>.........................] - ETA: 1:21 - loss: 2.1424 - accuracy: 0.2391

KeyboardInterrupt: 

## Logging batches seen

> When training models with different `batch_size`, it's important to compare them based on the times the parameters were updated, rather than the number of epochs.

In [None]:
#| export

class BatchesSeenLogger(Callback):
    """Logs the number of batches seen by the model."""

    def __init__(self):
        super(BatchesSeenLogger, self).__init__()
        self.batches_seen = 0

    def on_train_batch_end(self, 
                           batch, # Batch number.
                           logs=None, # Dictionary containing metrics and information of the training.
                           ):
        """Stores the number of batches seen."""
        self.batches_seen += 1

    def on_epoch_end(self,
                     epoch, # Epoch number.
                     logs=None, # Dictionary containing metrics and information of the training.
                     ):
        """Logs the number of batches seen into wandb."""
        wandb.log({"Batches": self.batches_seen})