<a href="https://colab.research.google.com/github/JiahuiKChen/3D-Object-Reconstruction/blob/master/Voxel_AE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# Setup :)
import numpy as np
from keras.models import Model, Sequential
from keras.layers.normalization import BatchNormalization
from keras.layers import Flatten, Conv3D, Dense, Conv1D, Input, Reshape, Conv3DTranspose
from keras.engine.input_layer import Input
from keras.losses import logcosh
from keras.regularizers import l2
from keras import backend as K

# Uses Keras functional API: https://keras.io/getting-started/functional-api-guide/

In [0]:
################### MODIFIED BINARY CROSS ENTROPY LOSS FUNCTION ############
# Binary cross entropy but with a lambda parameter that 
# encourages false positives and discourage false negatives (because of the high
# amounts of blank voxels, without this term the model would output empty voxels)
def lambda_binary_crossentropy(y_true, y_pred):
  output = K.clip(y_pred, 0.1, 1)
  binary_entr = -0.97 * y_true * K.log(output) - (0.03) * (1-y_true) * K.log(1-output)
  
  # getting tensor values into scalar
  loss = K.sum(binary_entr, axis=1)
  scalar_loss = K.mean(loss)  
  
  return scalar_loss

In [0]:
########################## ENCODER NETWORK #################################
# 5 layers
# starts with 32x32x32 input, ends with Fully Connected layer (flattened 7x7x7 output of 4th convolutional layer)

# ???
# Glorot initialization is deafault (glorot_uniform) - Keras has glorot_normal 
# and glorot_uniform, paper is unclear which should be used

# Input is 32x32x32 point cloud
voxel_input = Input(shape=(32, 32, 32, 1))

# First convolutional layer: outputs 30x30x30
encode_c1 = Conv3D(8, kernel_size=3, activation='elu', padding='valid',
              data_format='channels_last', kernel_regularizer=l2(l=0.01))(voxel_input)
encode_b1 = BatchNormalization()(encode_c1)

# Second convolutional layer: outputs 15x15x15 (downsamples via striding)
encode_c2 = Conv3D(16, kernel_size=3, activation='elu', padding='same',
              strides=(2, 2, 2), kernel_regularizer=l2(l=0.01))(encode_b1)
encode_b2 = BatchNormalization()(encode_c2)

# Third convolutional layer: outputs 13x13x13
encode_c3 = Conv3D(32, kernel_size=3, activation='elu', padding='valid',
                  kernel_regularizer=l2(l=0.01))(encode_b2)
encode_b3 = BatchNormalization()(encode_c3)

# Fourth convolutional layer: outputs 7x7x7 (downsamples via striding)
encode_c4 = Conv3D(64, kernel_size=3, activation='elu', padding='same', 
              strides=(2, 2, 2), kernel_regularizer=l2(l=0.01))(encode_b3)
encode_b4 = BatchNormalization()(encode_c4)

# Fifth layer, fully connected: outputs 343
encode_f5 = Flatten()(encode_b4)
encode_b5 = BatchNormalization()(encode_f5)

# Sixth layer - LATENT LAYER: outputs 100
latent = Dense(100, use_bias=True, activation='elu',
               kernel_regularizer=l2(l=0.01))(encode_b5)

encoder = Model(inputs=voxel_input, outputs=latent)

In [37]:
# Structure/info about encoder
encoder.compile(optimizer='adam', loss='logcosh', metrics=['accuracy'])
encoder.summary()

# Testing encoder on dummy data
dummy_data = np.random.rand(100, 32, 32, 32, 1)
# Labels don't matter for us, we only care about the model's ouptut, output must be in the shape of final layer!!!
dumb_labels = np.random.rand(100, 100)
encoder.fit(dummy_data, dumb_labels, shuffle=True)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_23 (InputLayer)        (None, 32, 32, 32, 1)     0         
_________________________________________________________________
conv3d_56 (Conv3D)           (None, 30, 30, 30, 8)     224       
_________________________________________________________________
batch_normalization_96 (Batc (None, 30, 30, 30, 8)     32        
_________________________________________________________________
conv3d_57 (Conv3D)           (None, 15, 15, 15, 16)    3472      
_________________________________________________________________
batch_normalization_97 (Batc (None, 15, 15, 15, 16)    64        
_________________________________________________________________
conv3d_58 (Conv3D)           (None, 13, 13, 13, 32)    13856     
_________________________________________________________________
batch_normalization_98 (Batc (None, 13, 13, 13, 32)    128       
__________

<keras.callbacks.History at 0x7fda2657e1d0>

In [0]:
########################## DECODER NETWORK ################################# 

# Latent space of 1D dimension 100 is input for decoder
decoder_input = Input(shape=(100,))

# First layer of decoder, fully connected layer: outputs 343
decode_f1 = Dense(343, use_bias=True, activation='elu',
                 kernel_regularizer=l2(l=0.01))(decoder_input)
decode_b1 = BatchNormalization()(decode_f1)

# Reshape layer from fully connected to 7x7x7
# must add spacial dimension for convolutions to work
decode_reshape = Reshape((7, 7, 7, 1), input_shape=(343,))(decode_b1) 
# Second convolutional layer: convolutes fully connected layer into 7x7x7 (with 64 filters)
# decode_c2 = Conv3DTranspose(64, kernel_size=3, activation='elu', 
#                    padding='same')(decode_reshape)

# Second convolutional layer: convolutes fully connected layer into 7x7x7
# ??? Not exactly sure how to get from 1D to 3D convolution...(dimension wise)
# decode_reshape = Reshape((7, 7, 7, 1), input_shape=(343,))(decode_b1) # must add spacial dimension for convolutions to work
decode_c2 = Conv3D(64, kernel_size=3, activation='elu', 
                   padding='same', kernel_regularizer=l2(l=0.01))(decode_reshape)
decode_b2 = BatchNormalization()(decode_c2)

# Third layer (second convolutional layer): outputs 15x15x15
decode_c3 = Conv3DTranspose(32, kernel_size=3, activation='elu',padding='valid',
                   strides=(2, 2, 2), kernel_regularizer=l2(l=0.01))(decode_b2)
decode_b3 = BatchNormalization()(decode_c3)

# Fourth convolutional layer: outputs 15x15x15
decode_c4 = Conv3DTranspose(16, kernel_size=3, activation='elu', 
                   padding='same', kernel_regularizer=l2(l=0.01))(decode_b3)
decode_b4 = BatchNormalization()(decode_c4)

# Fifth convolutional layer: outputs 32x32x32 
decode_c5 = Conv3DTranspose(8, kernel_size=3, activation='elu', padding='valid',
                   strides=(2, 2, 2), output_padding=1, kernel_regularizer=l2(l=0.01))(decode_b4)
decode_b5 = BatchNormalization()(decode_c5)

# OUTPUT LAYERRRRRRRR!!! Sigmoid function to output probability each voxel is filled
decode_output = Conv3DTranspose(1, kernel_size=3, activation='sigmoid', 
                   padding='same', kernel_regularizer=l2(l=0.01))(decode_b5)

decoder = Model(inputs=decoder_input, outputs=decode_output)

In [15]:
# Structure/info about decoder
decoder.compile(optimizer='adam', loss='logcosh', metrics=['accuracy'])
decoder.summary()

# Testing decoder on dummy data
dummy_data = np.random.rand(100, 100)
# Labels don't matter for us, we only care about the model's ouptut, output must be in the shape of final layer!!!
dumb_labels = np.random.rand(100, 32, 32, 32, 1)
decoder.fit(dummy_data, dumb_labels, shuffle=True)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_10 (InputLayer)        (None, 100)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 343)               34643     
_________________________________________________________________
batch_normalization_36 (Batc (None, 343)               1372      
_________________________________________________________________
reshape_6 (Reshape)          (None, 7, 7, 7, 1)        0         
_________________________________________________________________
conv3d_14 (Conv3D)           (None, 7, 7, 7, 64)       1792      
_________________________________________________________________
batch_normalization_37 (Batc (None, 7, 7, 7, 64)       256       
_________________________________________________________________
conv3d_transpose_21 (Conv3DT (None, 15, 15, 15, 32)    55328     
__________

<keras.callbacks.History at 0x7fda2e05e7d0>

In [42]:
################################# AUTOENCODER ###############################
reconstruction = decoder(encoder(voxel_input))
ae = Model(inputs=voxel_input, outputs=reconstruction)

ae.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_23 (InputLayer)        (None, 32, 32, 32, 1)     0         
_________________________________________________________________
model_24 (Model)             (None, 100)               2356500   
_________________________________________________________________
model_13 (Model)             (None, 32, 32, 32, 1)     111136    
Total params: 2,467,636
Trainable params: 2,422,566
Non-trainable params: 45,070
_________________________________________________________________


In [0]:
# train with: model.compile(optimizer='binary_crossentropy', loss='logcosh', metrics=['accuracy'])
ae.compile(optimizer='adam', loss=lambda_binary_crossentropy)

# ae.fit(data, epochs=100)
# then: model.fit(data, data_labels, shuffle=True) where labels don't matter for us

In [0]:
# OLD ENCODER NETWORK IMPLEMENTATION: (doesn't use functional keras)
# vae = Sequential()
# # First convolutional layer
# vae.add(Conv3D(8, kernel_size=3, activation='elu', padding='valid', 
#                    data_format='channels_last', input_shape=(32, 32, 32, 1))) 
# vae.add(BatchNormalization())

# # Second convolutional layer, downsampling through strided convolutions 
# # (default striding is 1x1x1, so up this to 2x2x2)
# vae.add(Conv3D(16, kernel_size=3, activation='elu', padding='same', strides=(2, 2, 2))) 
# vae.add(BatchNormalization())

# # # Third convolutional layer
# vae.add(Conv3D(32, kernel_size=3, activation='elu', padding='valid'))
# vae.add(BatchNormalization())

# # # Fourth convolutional layer, downsampling through strided convolutions
# vae.add(Conv3D(64, kernel_size=3, activation='elu', padding='same', strides=(2, 2, 2)))
# vae.add(BatchNormalization())

# # # Fully connected layer and latent layer
# # # Fully connected: Dense layer that takes in flattened output of previous layer 
# # # Latent layer: output of fully connected layer is 100 dimensional latent space
# vae.add(Flatten())
# vae.add(Dense(100, use_bias=True, activation='elu'))

# vae.compile(optimizer='adam', loss='logcosh', metrics=['accuracy'])
# vae.summary()


# OLD DECODER NETWORK IMPLEMENTATION
# # First layer of decoder
# vae.add(Dense(343, use_bias=True, activation='elu'))

# # Convolution layer that convolutes fully connected layer into 7x7x7
# vae.add(Conv1D(64, kernel_size=3, activation='elu', padding='same'))

In [0]:
# is now part of encoder, not separate
########################## LATENT LAYER #################################
# "All but the output layer are Batch Normalized" - this is interpreted as the output 
# layer of the sigmoid AND the latent layer are not Batch Normalized but ???

# input of latent layer is last layer of encoder network
# latent_input = Input(shape=(21952,))

# # Sixth layer - LATENT LAYER: outputs 100
# latent = Dense(100, use_bias=True, activation='elu',
#                kernel_regularizer=l2(l=0.01))(latent_input)

# latent_layer = Model(inputs=latent_input, outputs=latent)

In [0]:
# Structure/info about latent layer
# latent_layer.compile(optimizer='adam', loss='logcosh', metrics=['accuracy'])
# latent_layer.summary()

# # Testing latent layer on dummy data
# dummy_data = np.random.rand(100, 21952)
# fake_labels = np.random.rand(100, 100) 
# latent_layer.fit(dummy_data, fake_labels, shuffle=True)