In this notebook, we look at using a simple model built off a custom keras layer that computes anisotropic magnitudes of
the gradient. These anisotropic magnitude transformations are non-linear. Their directions and sizes are both trainable.

# Description of Data Files

The dataset is from [http://yann.lecun.com/exdb/mnist/](http://yann.lecun.com/exdb/mnist/). The decription of the file formats
is also given there.

## Image File Format
```
[offset] [type]          [description] 
0000     32 bit integer  magic number 
0004     32 bit integer  number of images 
0008     32 bit integer  number of rows 
0012     32 bit integer  number of columns 
0016     unsigned byte   pixel 
0017     unsigned byte   pixel 
........ 
xxxx     unsigned byte    pixel
```

## Label File Format
```
[offset] [type]          [description] 
0000     32 bit integer  magic number (MSB first) 
0004     32 bit integer  number of items 
0008     unsigned byte   label 
0009     unsigned byte   label 
........ 
xxxx     unsigned byte   label
The labels values are 0 to 9.
```

In [None]:
import struct # To unpack string literals of bytes to integers.
import numpy as np
import matplotlib.pyplot as plt
from my_src import my_layers
plt.rcParams['svg.fonttype'] = 'none' # Saves space when saving svg plots to file.

import tensorflow as tf
import keras

In [None]:
X = {}
y = {}

# Get the Training Data

In [None]:
# Get the training images information.

with open('data/train-images.idx3-ubyte', 'rb') as f:
    _ = f.read(4) # Read the magic number.
    training_info = {name : f.read(4) for name in ['n_images', 'n_rows', 'n_columns']}
    print(training_info)
    # Make sure to enforce big-endian.
    training_info = {key : struct.unpack('>i', value)[0] for key, value in training_info.items()}
    print(training_info)
    
    images = np.fromfile(f, dtype = 'uint8')
    images = images.reshape(training_info['n_images'], training_info['n_rows'], training_info['n_columns'])
X['train'] = images.astype('float32') / 255

In [None]:
# Get the training labels.

with open('data/train-labels.idx1-ubyte', 'rb') as f:
    _ = f.read(4) # Read the magic number.
    training_info['n_labels'] = struct.unpack('>i', f.read(4))[0]
    print(training_info)
        
    labels = np.fromfile(f, dtype = 'uint8')
y['train'] = labels

In [None]:
np.unique(y['train'], return_counts = True)

# Get the Test Data

In [None]:
# Get the test images.

with open('data/t10k-images.idx3-ubyte', 'rb') as f:
    _ = f.read(4) # Read the magic number.
    testing_info = {name : f.read(4) for name in ['n_images', 'n_rows', 'n_columns']}
    print(testing_info)
    # Make sure to enforce big-endian.
    testing_info = {key : struct.unpack('>i', value)[0] for key, value in testing_info.items()}
    print(testing_info)
    
    images = np.fromfile(f, dtype = 'uint8')
    images = images.reshape(testing_info['n_images'], testing_info['n_rows'], testing_info['n_columns'])
X['test'] = images.astype('float32') / 255

In [None]:
# Get the training labels.

with open('data/t10k-labels.idx1-ubyte', 'rb') as f:
    _ = f.read(4) # Read the magic number.
    testing_info['n_lables'] = struct.unpack('>i', f.read(4))[0]
    print(testing_info)
        
    labels = np.fromfile(f, dtype = 'uint8')
y['test'] = labels

# Anisotropic Layers

In [None]:
# Function to construct layers of the complete model.
import importlib
importlib.reload(my_layers)

def make_simple_model(n_directions, input_shape):
    inputs = tf.keras.Input(shape = input_shape)
    x = tf.keras.layers.Reshape(target_shape = input_shape + (1,),
                                name = 'Form_Channel')(inputs)
    grads = my_layers.AnisotropicGrad2D(n_directions,
                                        name = 'Anisotropic_Grads')(x)
    grads = tf.keras.layers.Permute((4, 3, 2, 1))(grads)
    grads = tf.keras.layers.Reshape(target_shape = (n_directions, grads.shape[-1] * grads.shape[-2]))(grads)
    
    # Instead of taking mean do a more general dense connection.
    grads = tf.keras.layers.Dense(units = 1,
                                  kernel_regularizer = tf.keras.regularizers.l2())(grads)
    
    # Need to use a Lambda Layer as inputs between layers in keras functional api needs to be keras layers.
    mean_over_image = tf.keras.layers.Lambda(lambda X : tf.keras.backend.mean(X, axis = [1, 2]),
                                             name = 'Mean_Over_Image')
    #grads = mean_over_image(grads)
    mean = mean_over_image(x)
    mean = tf.keras.layers.Reshape(target_shape = (mean.shape[-1], 1))(mean)
    x = tf.keras.layers.concatenate([mean, grads], axis = -2)
    x = tf.keras.layers.Reshape(target_shape = (x.shape[1] * x.shape[2],))(x)
    class_probs = tf.keras.layers.Dense(units = 10,
                                        activation = tf.nn.softmax,
                                        kernel_regularizer = tf.keras.regularizers.l2(),
                                        name = 'Predictions')(x)
    model = tf.keras.Model(inputs = inputs, outputs = class_probs)
    return model
    
    
    
def make_model(n_directions, n_filters, input_shape):
    inputs = tf.keras.Input(shape = input_shape)
    x = tf.keras.layers.Reshape(target_shape = input_shape + (1,),
                                name = 'Form_Channel')(inputs)
    x = my_layers.AnisotropicGrad2D(n_directions,
                                    name = 'Anisotropic_Grads')(x)
    x = tf.keras.layers.Reshape(target_shape = (input_shape[0] - 1, input_shape[1] - 1, n_directions,),
                                name = 'Flatten_Grads')(x)
    x = tf.keras.layers.Conv2D(filters = n_filters,
                               kernel_size = 4,
                               kernel_regularizer = tf.keras.regularizers.l2(),
                               name = 'conv2d')(x)
    x = tf.keras.layers.MaxPool2D(pool_size = 4)(x)
    x = tf.keras.layers.Reshape(target_shape = (x.shape[1] * x.shape[2] * x.shape[3],))(x)
    class_probs = tf.keras.layers.Dense(units = 10, 
                                        activation = tf.nn.softmax,
                                        kernel_regularizer = tf.keras.regularizers.l2(),
                                        name = 'Prediction')(x)
    model = tf.keras.Model(inputs = inputs, outputs = class_probs)
    return model

In [None]:
make_simple_model(n_directions = 5, input_shape = X['train'].shape[1:])

In [None]:
model = make_simple_model(n_directions = 16, input_shape = X['train'].shape[1:])
model.compile(optimizer = tf.keras.optimizers.Adam(),
              loss = 'sparse_categorical_crossentropy',
              metrics = ['accuracy'])
model.fit(X['train'], y['train'], epochs = 5)

In [None]:
test_loss, test_acc = model.evaluate(X['test'], y['test'])

In [None]:
for w in model.trainable_weights[:2]:
    print(w)
    plt.plot(tf.keras.backend.eval(w))
    plt.show()
grad_reduce_w = tf.keras.backend.eval(model.trainable_weights[2])
print(grad_reduce_w.reshape(27, 27).shape)
plt.imshow(grad_reduce_w.reshape(27,27))
plt.colorbar()
plt.show()

prediction_w = model.trainable_weights[-2]
print(prediction_w)
plt.imshow(tf.keras.backend.eval(prediction_w))
plt.colorbar()
plt.show()


In [None]:
model = make_model(5, 16, X['train'].shape[1:])
print(model(X['train'][:2]).shape)
print(tf.keras.backend.eval(model(X['train'][:2])))

In [None]:
model = make_model(n_directions = 16, n_filters = 16, input_shape = X['train'].shape[1:])
model.compile(optimizer = tf.keras.optimizers.Adam(),
              loss = 'sparse_categorical_crossentropy',
              metrics = ['accuracy'])
model.fit(X['train'], y['train'])

In [None]:
test_loss, test_acc = model.evaluate(X['test'], y['test'])

In [None]:
for x in model.layers[2]._trainable_weights:
    print(x)
    print(tf.keras.backend.eval(x))

In [None]:
tf.keras.backend.clear_session()