In [1]:
#%env CUDA_DEVICE_ORDER=PCI_BUS_ID
#%env CUDA_VISIBLE_DEVICES=0

In [2]:
%load_ext autoreload
%autoreload 2

# Imports

In [3]:
%matplotlib notebook
import numpy as np
import matplotlib.pyplot as plt

from matplotlib import animation
from IPython.display import HTML

import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_probability as tfp

# Data Loading

In [4]:
data = np.load("../../data/mnist_test_seq.npy")
data.shape

(20, 10000, 64, 64)

# Data reshaping

In [5]:
# We can see that data is of shape (window, n_samples, width, height)
# But we want for keras something of shape (n_samples, window, width, height)
data = np.moveaxis(data, 0, 1)
# Also expand dimensions to have channels at the end (n_samples, window, width, height, channels)
data = np.expand_dims(data, axis=-1)
data.shape

(10000, 20, 64, 64, 1)

# See the frame in action:

In [6]:
def display_videos(data, n_rows=3, n_cols=3):
    fig, axs = plt.subplots(nrows=n_rows, ncols=n_cols, squeeze=False)
    ims = []

        
    for i in range(n_rows):
        for j in range(n_cols):
            idx = i*n_rows + j
            video = data[idx]
            im = axs[i][j].imshow(video[0,:,:,:], animated=True)
            ims.append(im)

            plt.close() # this is required to not display the generated image

    def init():
        for i in range(n_rows):
            for j in range(n_cols):
                idx = i*n_rows + j
                video = data[idx]
                im = ims[idx]
                im.set_data(video[0,:,:,:])

    def animate(frame_id):
        for i in range(n_rows):
            for j in range(n_cols):
                idx = i*n_rows + j
                video = data[idx]
                ims[idx].set_data(video[frame_id,:,:,:])
        return ims

    anim = animation.FuncAnimation(fig, animate, 
                                   init_func=init, 
                                   frames=data.shape[1],
                                   blit=True,
                                   interval=100)
    return HTML(anim.to_html5_video())
    

In [7]:
display_videos(data[:10], n_rows=1, n_cols=5)

<IPython.core.display.Javascript object>

# Create dataset object

In [8]:
def _preprocess(sample):
    image = tf.cast(sample, tf.float32) / 255.  # Scale to unit interval.
    image = image < tf.random.uniform(tf.shape(image))   # Randomly binarize.
    return image, image

train_dataset = (tf.data.Dataset.from_tensor_slices(data[:9000])
                 .map(_preprocess)
                 .batch(256)
                 .prefetch(tf.data.AUTOTUNE)
                 .shuffle(int(10e3)))
test_dataset = (tf.data.Dataset.from_tensor_slices(data[9000:])
                 .map(_preprocess)
                 .batch(256)
                 .prefetch(tf.data.AUTOTUNE)
                 .shuffle(int(10e3)))

2022-04-07 05:14:16.517074: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-04-07 05:14:19.034009: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 12947 MB memory:  -> device: 0, name: NVIDIA RTX A4000, pci bus id: 0000:01:00.0, compute capability: 8.6
2022-04-07 05:14:19.035334: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 14256 MB memory:  -> device: 1, name: NVIDIA RTX A4000, pci bus id: 0000:25:00.0, compute capability: 8.6
2022-04-07 05:14:19.036344: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:2 w

# Specify model

In [9]:
strategy = tf.distribute.MirroredStrategy()

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3', '/job:localhost/replica:0/task:0/device:GPU:4', '/job:localhost/replica:0/task:0/device:GPU:5', '/job:localhost/replica:0/task:0/device:GPU:6', '/job:localhost/replica:0/task:0/device:GPU:7')


In [18]:
input_shape = data.shape[1:]
encoded_size = 32
base_depth = 32

In [19]:
prior = tfp.distributions.Independent(tfp.distributions.Normal(loc=tf.zeros(encoded_size), scale=1),
                        reinterpreted_batch_ndims=1)

encoder = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=input_shape),
    tf.keras.layers.Lambda(lambda x: tf.cast(x, tf.float32) - 0.5),
    tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D(base_depth, 5, strides=1,
                padding='same', activation=tf.nn.leaky_relu)),
    tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D(base_depth, 5, strides=2,
                padding='same', activation=tf.nn.leaky_relu)),
    tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D(2 * base_depth, 5, strides=1,
                padding='same', activation=tf.nn.leaky_relu)),
    tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D(2 * base_depth, 5, strides=2,
                padding='same', activation=tf.nn.leaky_relu)),
    tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D(4 * encoded_size, 7, strides=1,
                padding='valid', activation=tf.nn.leaky_relu)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(tfp.layers.MultivariateNormalTriL.params_size(encoded_size),
               activation=None),
    tfp.layers.MultivariateNormalTriL(
        encoded_size,
        activity_regularizer=tfp.layers.KLDivergenceRegularizer(prior)),
])

In [20]:
with strategy.scope():
    encoder = tf.keras.Sequential([
        tf.keras.layers.InputLayer(input_shape=input_shape),
        tf.keras.layers.Lambda(lambda x: tf.cast(x, tf.float32) - 0.5),
        tf.keras.layers.Conv3D(base_depth, 5, strides=1,
                    padding='same', activation=tf.nn.leaky_relu),
        tf.keras.layers.Conv3D(base_depth, 5, strides=2,
                    padding='same', activation=tf.nn.leaky_relu),
        tf.keras.layers.Conv3D(2 * base_depth, 5, strides=1,
                    padding='same', activation=tf.nn.leaky_relu),
        tf.keras.layers.Conv3D(2 * base_depth, 5, strides=2,
                    padding='same', activation=tf.nn.leaky_relu),
        #tf.keras.layers.Conv3D(4 * encoded_size, 7, strides=1,
        #            padding='valid', activation=tf.nn.leaky_relu),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(tfp.layers.MultivariateNormalTriL.params_size(encoded_size),
                   activation=None),
        tfp.layers.MultivariateNormalTriL(
            encoded_size,
            activity_regularizer=tfp.layers.KLDivergenceRegularizer(prior)),
    ])

In [21]:
encoder.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lambda_1 (Lambda)           (None, 20, 64, 64, 1)     0         
                                                                 
 conv3d_4 (Conv3D)           (None, 20, 64, 64, 32)    4032      
                                                                 
 conv3d_5 (Conv3D)           (None, 10, 32, 32, 32)    128032    
                                                                 
 conv3d_6 (Conv3D)           (None, 10, 32, 32, 64)    256064    
                                                                 
 conv3d_7 (Conv3D)           (None, 5, 16, 16, 64)     512064    
                                                                 
 flatten_2 (Flatten)         (None, 81920)             0         
                                                                 
 dense_1 (Dense)             (None, 560)              

In [22]:
with strategy.scope():
    decoder = tf.keras.Sequential([
        tf.keras.layers.InputLayer(input_shape=[encoded_size]),
        tf.keras.layers.Reshape([1, 1, 1, encoded_size]),
        tf.keras.layers.Conv3DTranspose(2 * base_depth, (5, 4, 4), strides=1,
                             padding='valid', activation=tf.nn.leaky_relu),
        tf.keras.layers.Conv3DTranspose(2 * base_depth, (5, 4, 4), strides=(1, 2, 2),
                             padding='same', activation=tf.nn.leaky_relu),
        tf.keras.layers.Conv3DTranspose(2 * base_depth, (5, 4, 4), strides=2,
                             padding='same', activation=tf.nn.leaky_relu),
        tf.keras.layers.Conv3DTranspose(base_depth, (5, 4, 4), strides=(1, 2, 2),
                             padding='same', activation=tf.nn.leaky_relu),
        tf.keras.layers.Conv3DTranspose(base_depth, (5, 4, 4), strides=2,
                             padding='same', activation=tf.nn.leaky_relu),
        tf.keras.layers.Conv3DTranspose(base_depth, (5, 4, 4), strides=1,
                             padding='same', activation=tf.nn.leaky_relu),
        tf.keras.layers.Conv2D(filters=1, kernel_size=5, strides=1,
                    padding='same', activation=None),
        tf.keras.layers.Flatten(),
        tfp.layers.IndependentBernoulli(input_shape, tfp.distributions.Bernoulli.logits),
    ])

In [23]:
decoder.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 reshape_1 (Reshape)         (None, 1, 1, 1, 32)       0         
                                                                 
 conv3d_transpose_6 (Conv3DT  (None, 5, 4, 4, 64)      163904    
 ranspose)                                                       
                                                                 
 conv3d_transpose_7 (Conv3DT  (None, 5, 8, 8, 64)      327744    
 ranspose)                                                       
                                                                 
 conv3d_transpose_8 (Conv3DT  (None, 10, 16, 16, 64)   327744    
 ranspose)                                                       
                                                                 
 conv3d_transpose_9 (Conv3DT  (None, 10, 32, 32, 32)   163872    
 ranspose)                                            

In [24]:

with strategy.scope():
    vae = tf.keras.Model(inputs=encoder.inputs,
                    outputs=decoder(encoder.outputs[0]))

In [25]:
negloglik = lambda x, rv_x: -rv_x.log_prob(x)

vae.compile(optimizer=tf.optimizers.Adam(learning_rate=1e-3),
            loss=negloglik)

callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5, restore_best_weights=True)

_ = vae.fit(train_dataset,
            epochs=100,
            validation_data=test_dataset,
            callbacks=[callback]
           )

2022-04-07 05:16:23.557981: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:776] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Found an unshardable source dataset: name: "TensorSliceDataset/_1"
op: "TensorSliceDataset"
input: "Placeholder/_0"
attr {
  key: "Toutput_types"
  value {
    list {
      type: DT_UINT8
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: 9000
  }
}
attr {
  key: "is_files"
  value {
    b: false
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\024TensorSliceDataset:0"
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
        dim {
          size: 20
        }
        dim {
          size: 64
        }
        dim {
          size: 64
        }
        dim {
          size: 1
        }
      }
    }
  }
}
experimental_type {
  type_id: TFT_PRODUCT
  args {
    type_id: TFT_DATASET
    args {
      type_id: TFT_PRODUCT
      args {
        t

Epoch 1/100
INFO:tensorflow:batch_all_reduce: 24 all-reduces with algorithm = nccl, num_packs = 1
INFO:tensorflow:batch_all_reduce: 24 all-reduces with algorithm = nccl, num_packs = 1


2022-04-07 05:16:50.009230: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 30 of 10000
2022-04-07 05:16:51.494975: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled.




2022-04-07 05:17:11.028863: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:776] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Found an unshardable source dataset: name: "TensorSliceDataset/_1"
op: "TensorSliceDataset"
input: "Placeholder/_0"
attr {
  key: "Toutput_types"
  value {
    list {
      type: DT_UINT8
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: 1000
  }
}
attr {
  key: "is_files"
  value {
    b: false
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\024TensorSliceDataset:5"
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
        dim {
          size: 20
        }
        dim {
          size: 64
        }
        dim {
          size: 64
        }
        dim {
          size: 1
        }
      }
    }
  }
}
experimental_type {
  type_id: TFT_PRODUCT
  args {
    type_id: TFT_DATASET
    args {
      type_id: TFT_PRODUCT
      args {
        t

Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100


2022-04-07 05:23:46.050424: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 32 of 10000
2022-04-07 05:23:46.520046: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled.


Epoch 16/100
Epoch 17/100
Epoch 18/100


2022-04-07 05:25:15.233295: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 35 of 10000
2022-04-07 05:25:15.288476: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:415] Shuffle buffer filled.


Epoch 19/100
Epoch 20/100


# Results

In [26]:
# We'll just examine ten random digits.
x = next(iter(test_dataset))[0][:5]
xhat = vae(x)
assert isinstance(xhat, tfp.distributions.Distribution)

In [27]:
print('Originals:')
display_videos(x, n_rows=1, n_cols=5)

Originals:


<IPython.core.display.Javascript object>

In [28]:
print('Decoded Random Samples:')
display_videos(xhat.sample(), n_rows=1, n_cols=5)

Decoded Random Samples:


<IPython.core.display.Javascript object>

In [29]:
print('Decoded Modes:')
display_videos(xhat.mode(), n_rows=1, n_cols=5)

Decoded Modes:


<IPython.core.display.Javascript object>

In [30]:
print('Decoded Means:')
display_videos(xhat.mean(), n_rows=1, n_cols=5)

Decoded Means:


<IPython.core.display.Javascript object>

In [31]:
print('Decoded variance:')
display_videos(xhat.variance(), n_rows=1, n_cols=5)

Decoded variance:


<IPython.core.display.Javascript object>

In [32]:
# Now, let's generate ten never-before-seen digits.
z = prior.sample(10)
xtilde = decoder(z)
assert isinstance(xtilde, tfp.distributions.Distribution)

In [33]:
print('Randomly Generated Samples:')
display_videos(xhat.sample(), n_rows=1, n_cols=5)

Randomly Generated Samples:


<IPython.core.display.Javascript object>

In [34]:
print('Randomly Generated Modes:')
display_videos(xhat.mode(), n_rows=1, n_cols=5)

Randomly Generated Modes:


<IPython.core.display.Javascript object>

In [35]:
print('Randomly Generated Means:')
display_videos(xhat.mean(), n_rows=1, n_cols=5)

Randomly Generated Means:


<IPython.core.display.Javascript object>

In [36]:
print('Randomly Generated variance:')
display_videos(xhat.variance(), n_rows=1, n_cols=5)

Randomly Generated variance:


<IPython.core.display.Javascript object>

In [None]:
encoder.save("mnist_encoder")



AttributeError: in user code:

    File "/home/abdalla/GANime/venv/lib/python3.8/site-packages/tensorflow_probability/python/layers/distribution_layer.py", line 1251, in __call__  *
        return self._kl_divergence_fn(distribution_a)
    File "/home/abdalla/GANime/venv/lib/python3.8/site-packages/tensorflow_probability/python/layers/distribution_layer.py", line 1370, in _fn  **
        kl = kl_divergence_fn(distribution_a, distribution_b_)
    File "/home/abdalla/GANime/venv/lib/python3.8/site-packages/tensorflow_probability/python/layers/distribution_layer.py", line 1354, in kl_divergence_fn
        distribution_a.log_prob(z) - distribution_b.log_prob(z),

    AttributeError: 'Tensor' object has no attribute 'log_prob'
