Dowloading the Pix3D dataset from their URL. Dataset will be stored in the this session's memory. The whole dowload should take about 2 minutes, depends on speed of internet connection.

In [None]:
!wget http://pix3d.csail.mit.edu/data/pix3d.zip
!unzip pix3d.zip && rm pix3d.zip

Importing libraries necessary for data reload.
Data are in the dataset as .jpg or .png images, .txt key points list, .obj CAD model, .mtl model and .mat Matlab voxel model. All the information needed for reload are stored in pix3d.json file.

In [None]:
# scipy for .mat to numpy conversion,
# numpy for mathematical and array operations, PIL for images,
# Path for work with paths and json for .json file reading
import scipy
import numpy as np
import PIL
from PIL import Image

from pathlib import Path
import json

import tensorflow as tf
from tensorflow.keras import layers

# Connection to google drive if needed
#from google.colab import drive
#drive.mount('/content/drive')

Functions needed for creating a training set. Comments inside the cell.

In [None]:
# data that are downloaded durring session are store in /content
data_path = '/content/'
###

# this function reads paths to images and voxel models from json file
def load_paths_from_json(json_file="pix3d.json"):
    with open(data_path + json_file, "r") as f:
        config = json.loads(f.read())

    img_paths = []
    voxel_paths = []

    for p in config:
        img_paths.append(p['img'])
        voxel_paths.append(p['voxel'])

    return img_paths, voxel_paths

# converting .mat voxels to numpy array voxels
def voxel_mat2np(path):
    mat = scipy.io.loadmat(path)
    np_array = mat['voxel']
    return np_array

# reshaping voxel model from 128x128x128 to 32x32x32
def reshape_vox(vox):
    vox = tf.expand_dims(vox, axis=-1)
    vox = tf.expand_dims(vox, axis=0)
    vox = vox.numpy().astype('float16')
    maxpool = layers.MaxPooling3D(pool_size=4)
    voxels_32 = maxpool(vox)
    voxels_32 = layers.Reshape((32, 32, 32))(voxels_32)
    voxels_32 = voxels_32.numpy().astype('uint8')

    return voxels_32


# loading image-voxel pairs from paths to them
def load_img_voxel(img_path, voxel_path,path=data_path):
    img = Image.open(data_path + img_path)
    img = img.resize((256,256))
    array = np.array(img)

    voxel = voxel_mat2np(data_path + voxel_path)
    voxel = reshape_vox(voxel)

    if np.shape(array) == (256,256,3):
        return array, voxel

As this single Colab session couldn't hold all of the arrays loaded in RAM at once, for purpose of this code presentation only 1000 randomly selected image-voxel pairs will be loaded.

In [None]:
import random
#n = np.shape(images)[0]
n = 10069
random.seed(42)
indx = random.sample(range(n),1000)
print(indx)

[1824, 409, 4506, 4012, 3657, 2286, 1679, 8935, 1424, 9674, 6912, 520, 488, 1535, 3582, 3811, 8279, 9863, 434, 9195, 3257, 8928, 6873, 3611, 7359, 9654, 4557, 106, 2615, 6924, 5574, 4552, 2547, 3527, 5514, 1674, 1519, 6224, 1584, 5881, 5635, 9891, 4333, 711, 7527, 8785, 2045, 6201, 1291, 9044, 4803, 5925, 9459, 3150, 1139, 750, 3733, 4741, 1307, 3814, 1654, 6227, 4554, 7428, 5977, 2664, 6065, 5820, 3432, 4374, 1169, 9980, 2803, 8751, 4010, 2677, 7573, 6216, 4422, 9125, 3598, 5313, 916, 3752, 525, 5168, 6572, 4386, 1084, 3456, 9292, 5155, 3483, 8179, 6482, 7517, 2340, 4339, 2287, 4040, 9197, 8830, 4304, 9577, 7019, 9560, 6543, 5930, 3593, 2266, 8348, 8085, 1489, 771, 1796, 2504, 2621, 6916, 9771, 1040, 6304, 6252, 9763, 7668, 8669, 4119, 9064, 188, 1876, 8797, 4371, 5573, 1827, 4808, 7123, 2591, 7433, 53, 4315, 8201, 2927, 8317, 1743, 4889, 9977, 3258, 6126, 2646, 8837, 8689, 9, 9813, 5310, 8005, 319, 1832, 5947, 5038, 3923, 949, 3946, 9295, 1290, 1403, 7962, 1133, 8727, 2060, 2103, 778

Calling functions from above to begin creating of the train set.

In [None]:
# loading paths to images and voxel models
i_paths, v_paths = load_paths_from_json()

# selecting only 1000 random samples
i_paths = [i_paths[i] for i in indx]
v_paths = [v_paths[i] for i in indx]

arrays = []
voxels = []

for i, v in zip(i_paths, v_paths):
    result = load_img_voxel(i,v)
    if result is not None:
        array, voxel = result
    #array, voxel = load_img_voxel(i,v)

    arrays.append(array)
    voxels.append(voxel)

# printing final size of arrays loaded to check everything went OK, if both
# are 1000, load probably went OK
print(len(arrays))
print(len(voxels))

# making arrays from the lists
images = np.stack(arrays, axis=0)
voxels = np.stack(voxels, axis=0).reshape((1000,32,32,32))

# this can save loaded images and voxels as arrays to Google drive, so it is done for next run
#save_path = '/content/drive/MyDrive/' + rest of the path in your drive
#np.save(save_path + 'imgs_as_arrays.npy', np.stack(arrays, axis=0))
#np.save(save_path + 'voxels_as_arrays.npy', np.stack(voxels, axis=0))

1000
1000


Now we have our train set ready for a single-view reconstruction test.

Now we'll import additional libraries for model training/testing.

In [None]:
import numpy as np
from matplotlib import pyplot as plt
import random

import tensorflow as tf
from tensorflow import keras

from tensorflow.keras.metrics import MeanAbsoluteError, RootMeanSquaredError, MeanSquaredError

from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, losses
from tensorflow.keras.models import Model

from datetime import datetime
import pytz

import os

Spliting the dataset into train, test and validation parts. The train set has 80% of the data and test and validation sets both have 10% of the dataset.

Splitting dataset like is common in machine learning and proven effective.

The random_state argument is for having the same random seed every time, so the dataset is split randomly, but the same each time it is run. It can be any positive integer.

In [None]:
X_train, X_rem, y_train, y_rem = train_test_split(images,voxels, train_size=0.8, random_state=42)
X_valid, X_test, y_valid, y_test = train_test_split(X_rem,y_rem, test_size=0.5, random_state=42)

del images, voxels, X_rem, y_rem

print(np.shape(X_train))
print(np.shape(y_train))
print(np.shape(X_valid))
print(np.shape(y_valid))
print(np.shape(X_test))
print(np.shape(y_test))

(800, 256, 256, 3)
(800, 32, 32, 32)
(100, 256, 256, 3)
(100, 32, 32, 32)
(100, 256, 256, 3)
(100, 32, 32, 32)


Crucial part of defining our model's architecture into model variable. This is model 3, with work name 2404131212.

In [None]:
model = tf.keras.Sequential([
        layers.Input(shape = (256, 256, 3)),
        layers.Conv2D(32, kernel_size=15, padding='same',strides=1, activation='relu'),
        layers.Conv2D(32, kernel_size=9, padding='same',strides=1, activation='relu'),
        layers.MaxPool2D(2),
        layers.BatchNormalization(),
        layers.Conv2D(64, kernel_size=7, padding='same',strides=1, activation='relu'),
        layers.Conv2D(64, kernel_size=7, padding='same', strides=1, activation='relu'),
        layers.MaxPool2D(2),
        layers.BatchNormalization(),
        layers.Conv2D(128, kernel_size=3, padding='same', strides=1, activation='relu'),
        layers.Conv2D(128, kernel_size=3, padding='same', strides=1, activation='relu'),
        layers.MaxPool2D(2),
        layers.BatchNormalization(),
        layers.Conv2D(256, kernel_size=3, padding='same', strides=1, activation = 'relu'),
        layers.Conv2D(256, kernel_size=3, padding='same', strides=1, activation = 'sigmoid'),
        layers.MaxPool2D(2),
        layers.BatchNormalization(),


        layers.Reshape((16, 16, 1, 256)),

        layers.Conv3DTranspose(128, kernel_size=(3,3,3), padding='same',strides=1, activation='relu'),
        layers.Conv3DTranspose(128, kernel_size=(3,3,3), padding='same',strides=1, activation='relu'),
        layers.BatchNormalization(),
        layers.UpSampling3D((1,1,8)),
        layers.Conv3DTranspose(64, kernel_size=(5,5,5), padding='same',strides=1, activation='relu'),
        layers.Conv3DTranspose(64, kernel_size=(5,5,5), padding='same',strides=1, activation='relu'),
        layers.BatchNormalization(),
        layers.UpSampling3D((2,2,4)),
        layers.Conv3D(1, kernel_size=1, activation='sigmoid', padding='same'),
        layers.Reshape((32, 32, 32))

])
# Adam optimizer is used for model and BCE as both loss function and metric
model.compile(optimizer='adam', loss=losses.BinaryCrossentropy(), metrics='binary_crossentropy')
# summary function will show us what the model looks like and how the parameters are distributed,
# also the shape of output of each layer is showed, and the size of the model at the end
model.summary()

Now the most fun part is the training of the model.

The following cells will:

1) Train the model, using train and validation data, for 50 epochs, with batch size of 32, and save the information about training into history variable

2) From the history variable we can now draw a plot of train and validation loss, to see how the model was build and if there is a need for further training or training the whole model again for less epochs.

3) Into variable date_time, time when the training was finished will be written and

4) the model, with weights, biases and the architecture will be saved in the memory of this session as .keras format (https://www.tensorflow.org/tutorials/keras/save_and_load).

In [None]:
history = model.fit(X_train, y_train,
                batch_size = 32,
                epochs=50,
                shuffle=True,
                validation_data=(X_valid, y_valid))

In [None]:
plt.plot(history.history['loss'][10:], label='loss')
plt.plot(history.history['val_loss'][10:], label='val_loss')
plt.show()

In [None]:
from datetime import datetime
import pytz

CET = pytz.timezone('Europe/Prague')
now = datetime.now(CET)
date_time = now.strftime("%y%m%d%H%M")
print(date_time)

In [None]:
model.save('/content/'+date_time+'.keras')

However, for just evaluating the model, we can load already trained model 1, by uploading it into this session and use the following cell. The summary method will show us the model was loaded as should.

Note: the model can be saved or loaded easily from Google Drive (if the Drive is mounted to the session)

In [None]:
model = tf.keras.models.load_model('/content/2404131212.keras')
model.summary()

Now let's evaluate the model.

First there are defined the metrics that will show us the performance of the model.

In [None]:
def MAE(X, y):
  mae = tf.keras.metrics.MeanAbsoluteError()
  mae.update_state([X], [y])
  mae.result().numpy()
  print('MAE = ', mae.result().numpy())
  return mae.result().numpy()

def STD(X, y):
  difference = np.subtract(X, y)
  std = np.round(np.std(difference),10)
  print('STD = ',std)
  return std

def BCE(X, y):
  bce = tf.keras.metrics.BinaryCrossentropy()
  bce.update_state([X], [y])
  bce.result().numpy()
  print('BCE = ', bce.result().numpy())
  return bce.result().numpy()


There, the model test data will be put into the model, which will predict (with predict() method) the voxels, which are 16-bit floats between 0 and 1.

Then the threshold is applied to convert the float data into 8-bit integer voxels.

In [None]:
test_pred = model.predict(X_test)
test_pred_tresholded = np.where(test_pred >0.5, 1, 0)
del test_pred
test_pred_tresholded = test_pred_tresholded.astype('uint8')

There we use the functions for metrics we defined earlier to numerize the model performance. The thresholded predicted voxels and the ground thruth voxels from test set are used for evaluation. Again, the test set was not seen by model during training and so it is an independent control.

In [None]:
MAE(test_pred_tresholded, y_test)
STD(test_pred_tresholded, y_test)
BCE(test_pred_tresholded, y_test)

And finally, the qualitative evaluation of the model with graphical display.

In the top row there are test images, in the middle row the voxels predicted by the model, and in the bottom row there are corresponding ground thruth voxels fro the comparison.

More from the test set can be seen by changing the k parameter at top to other positive integer and simply running the cell again.

In [None]:
# Change this variable to any positive integer to look through the test set
k = 0

n = 4
m = k*n
fig =  plt.figure(figsize=(20, 20))
for i in range(n):
    ax = fig.add_subplot(3, 4, i + 1)
    plt.imshow(X_test[i+m])

    ax = fig.add_subplot(3, 4, i + n + 1, projection='3d')
    ax.voxels(test_pred_tresholded[i+m,:,:,:], edgecolor='k')

    ax = fig.add_subplot(3, 4, i + n + n + 1, projection='3d')
    ax.voxels(y_test[i+m,:,:,:], edgecolor='k')
plt.show()