###### TODO

1. Ensure Residual block skip connection is working correctly
2. ensure the shapes of inputs and outputs in conv and residual blocks are what is expected

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.activations import relu
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt

In [8]:
# check tf version and if gpu is being used

print(tf.__version__)
print(tf.config.list_physical_devices('GPU'))

# this is needed for training to occue on rtx 2080 super as of (2021-01-11)
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

2.4.1
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


<b>Load the data and preprocess it, as mnist consists of greyscale images with one channel we will simply convert them to binary so that they are easier to work with

In [5]:
(ds_train, ds_test), ds_info = tfds.load('mnist', split=['test', 'test'], shuffle_files=True,
                                         as_supervised=True, with_info=True)

def to_binary(img, label):
    ''' make images binary '''
    img = tf.cast(img, tf.float32)
    img = tf.math.round(img/255.0)
    return img, tf.cast(img, tf.int32)

ds_train = ds_train.map(to_binary)
ds_train = ds_train.cache()

ds_train = ds_train.shuffle(ds_info.splits['train'].num_examples)
ds_train = ds_train.batch(64)
ds_test = ds_test.map(to_binary).batch(64).cache().prefetch(64)

### Custom Layers

The PixelCNN will require 2 blocks of layers.

1. The Masked 2D Convolution: This is used nsure that the kernel does not have informaion on the value i is predicting. This is done by setting parts of the kernel to zero (masking). There are two types of mask, Type A and B. The type A mask is where the center pixel of the kernelis also masked, in  future layers they are not. Reasoning begind this can be founnd in the paper linked in the readme.


2. The Residual Blocks: These layers will allow the netwprk to optimize the residuals of the input features to make the network more efficient.


The custom layers will be built by subclassing the Layer class in <i>tensorflow.keras.layers.Layer</i>

In [6]:
class MaskedConv2D(layers.Layer):
    def __init__(self, mask_type, kernel=5, filters=1):
        super(MaskedConv2D, self).__init__()
        self.kernel = kernel
        self.filters = filters
        self.mask_type = mask_type
        
        pass
    
    def build(self, input_shape):
        self.w = self.add_weight(shape=[self.kernel, self.kernel, input_shape[-1], self.filters], 
                                 initializer='glorot_normal',
                                 trainable=True)
        
        self.b = self.add_weight(shape=(self.filters,), initializer='zeros', trainable=True)
        
        mask = np.ones(self.kernel**2, dtype=np.float32)
        center = len(mask) // 2
        if self.mask_type == 'A':
            mask[center] = 0 
            
        mask = mask.reshape((self.kernel, self.kernel, 1, 1))
        
        self.mask = tf.constant(mask, dtype=tf.float32)
        
    def call(self, inputs):
        
        masked_w = tf.math.multiply(self.w, self.mask)
        output = tf.nn.conv2d(inputs, masked_w, 1, "SAME") + self.b
        
        return tf.nn.relu(output)
    
class ResidualBlock(layers.Layer):
    def __init__(self, h=32):
        super(ResidualBlock, self).__init__()
        
        self.forward = Sequential([MaskedConv2D('B', kernel=1, filters=h),
                                   MaskedConv2D('B', kernel=3, filters=h),
                                   MaskedConv2D('B', kernel=1, filters=2*h)])
        
    def call(self, inputs):
        x = self.forward(inputs)
        return x + inputs


def buildPixelCNN(hidden_features=64, output_features=64, num_resblocks=7):
    inputs = layers.Input(shape=[28, 28, 1])
    x = inputs
    x = MaskedConv2D('A', kernel=7, filters=2*hidden_features)(x)
    for _ in range(num_resblocks):
        x = ResidualBlock(hidden_features)(x)
        
    x = layers.Conv2D(output_features, (1,1), padding='same', activation='relu')(x)
    x = layers.Conv2D(1, (1,1), padding='same', activation='sigmoid')(x)
    
    return tf.keras.Model(inputs=inputs, outputs=x, name="pixelcnn")

pixelcnn = buildPixelCNN()
pixelcnn.summary()

Model: "pixelcnn"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 28, 28, 1)]       0         
_________________________________________________________________
masked_conv2d (MaskedConv2D) (None, 28, 28, 128)       6400      
_________________________________________________________________
residual_block (ResidualBloc (None, 28, 28, 128)       53504     
_________________________________________________________________
residual_block_1 (ResidualBl (None, 28, 28, 128)       53504     
_________________________________________________________________
residual_block_2 (ResidualBl (None, 28, 28, 128)       53504     
_________________________________________________________________
residual_block_3 (ResidualBl (None, 28, 28, 128)       53504     
_________________________________________________________________
residual_block_4 (ResidualBl (None, 28, 28, 128)       535

<b>Training the model</b>

In [7]:
pixelcnn.compile(loss=tf.keras.losses.BinaryCrossentropy(),
                 optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.001),
                 metrics=[tf.keras.losses.BinaryCrossentropy()])

pixelcnn.fit(ds_train, epochs=50, validation_data=ds_test)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50


Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x1e3c1d6a9d0>

Use the trained model to generate digits

In [None]:
grid_row = 5
grid_col = 5
batch = grid_row * grid_col
h = w = 28

for row in range(h):
    for col in range(w):
        prob = pixel_cnn.predict(images)[:, row, col]


In [None]:
# to make generate images with this model we will be "predicting" the value of each pixel one by one.
# 