In [1]:
import numpy as np
import gc
# import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
# import keras
import pickle
import math
from tensorflow.keras import backend as K
import sys
import cv2
import time

In [2]:
class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon


In [3]:
def load_data(file_postfix):
    
    outfile = 'D:/meronym_data/X_train'+file_postfix+'.np'
    with open(outfile, 'rb') as pickle_file:
        X_train = pickle.load(pickle_file)

    outfile = 'D:/meronym_data/class_v'+file_postfix+'.np'
    with open(outfile, 'rb') as pickle_file:
        class_v = pickle.load(pickle_file)

    outfile = 'D:/meronym_data/masks_train'+file_postfix+'.np'
    with open(outfile, 'rb') as pickle_file:
        masks = pickle.load(pickle_file)

    outfile = 'D:/meronym_data/X_val'+file_postfix+'.np'
    with open(outfile, 'rb') as pickle_file:
        X_train_val = pickle.load(pickle_file)

    outfile = 'D:/meronym_data/class_v_val'+file_postfix+'.np'
    with open(outfile, 'rb') as pickle_file:
        class_v_val = pickle.load(pickle_file)

    outfile = 'D:/meronym_data/masks_val'+file_postfix+'.np'
    with open(outfile, 'rb') as pickle_file:
        masks_val = pickle.load(pickle_file)

#     outfile = 'D:/meronym_data/X_test'+part_data_post_fix+'.np'
#     with open(outfile, 'rb') as pickle_file:
#         X_test = pickle.load(pickle_file)

#     outfile = 'D:/meronym_data/X_test'+obj_data_postfix+'.np'
#     with open(outfile, 'rb') as pickle_file:
#         X_obj_test = pickle.load(pickle_file)
        
        
    return X_train, class_v, masks, X_train_val, class_v_val, masks_val

In [4]:
def shuffle_latent(a, b, c, d=None):
    p = np.random.permutation(len(a))
    if d is None:
        return a[p], b[p], c[p]
    return a[p], b[p], c[p], d[p]

def sampling(z_mean, z_log_var):
    epsilon = tf.random_normal(tf.shape(z_log_var), name="epsilon")
    return z_mean + epsilon * tf.exp(z_log_var)



def frange_cycle_linear(n_iter, start=0.0, stop=1.0,  n_cycle=4, ratio=0.5):
    L = np.ones(n_iter) * stop
    period = n_iter/n_cycle
    step = (stop-start)/(period*ratio)

    for c in range(n_cycle):
        v, i = start, 0
        while v <= stop and (int(i+c*period) < n_iter):
            L[int(i+c*period)] = v
            v += step
            i += 1
    return L

In [5]:
def reconstruction_loss(true_masks, pred_masks, z_mean, z_logvar, z_latent):
    
    kl_loss = tf.reduce_mean(0.5 * tf.reduce_sum(tf.square(z_mean) + tf.square(tf.exp(z_logvar)) - 2*(z_logvar) - 1,
                                                 axis=1))
    
    mask_loss = tf.reduce_mean(tf.keras.backend.binary_crossentropy(true_masks,pred_masks))

    return mask_loss, kl_loss
    

In [6]:
batch_size = 16
max_num_node = 16
latent_dims = 64

latent_inputs = keras.Input(shape=(latent_dims,))

true_maps = keras.Input(shape=([max_num_node, 64, 64, 1]), dtype=tf.float32)
true_masks = keras.Input(shape=([max_num_node, 64, 64, 1]), dtype=tf.float32)
true_edges = keras.Input(shape=([max_num_node, 64, 64, 1]), dtype=tf.float32)

true_bbxs = keras.Input(shape=([max_num_node, 4]), dtype=tf.float32)
cond_bbxs = keras.Input(shape=([max_num_node, 4]), dtype=tf.int32)

true_lbls = keras.Input(shape=([max_num_node, 1]), dtype=tf.float32)
cond_lbls = keras.Input(shape=([max_num_node, 1]), dtype=tf.float32)

true_classes = keras.Input(shape=([7]), dtype=tf.float32)
cond_classes = keras.Input(shape=([7]), dtype=tf.float32)


rnn_bbxs = layers.Bidirectional(layers.GRU(4, return_sequences=True))(true_bbxs)
concatenated_bbx_lbl = rnn_bbxs
dense_cond = layers.Dense(64, activation='tanh')(true_classes)
enc = layers.TimeDistributed(layers.Conv2D(8, kernel_size=3))(true_masks)
enc = layers.TimeDistributed(layers.BatchNormalization(trainable = False))(enc)
enc = layers.TimeDistributed(layers.Activation('relu'))(enc)

enc = layers.TimeDistributed(layers.Conv2D(16, kernel_size=3))(enc)
enc = layers.TimeDistributed(layers.BatchNormalization(trainable = False))(enc)
enc = layers.TimeDistributed(layers.Activation('relu'))(enc)

enc = layers.TimeDistributed(layers.MaxPooling2D(pool_size=(2, 2)))(enc)

enc = layers.TimeDistributed(layers.Conv2D(32, kernel_size=3, activation='relu'))(enc)
enc = layers.TimeDistributed(layers.BatchNormalization(trainable = False))(enc)
enc = layers.TimeDistributed(layers.Activation('relu'))(enc)
enc = layers.TimeDistributed(layers.Flatten())(enc)
TDD = layers.TimeDistributed(layers.Dense(64, activation='relu', name = 'encoded_bitmaps'))
dense_enc_maps = TDD(enc)

BGRU = layers.Bidirectional(layers.GRU(32, return_sequences=True))
rnn_maps = BGRU(dense_enc_maps)

D = layers.Dense(64, activation='tanh')
attention = D(concatenated_bbx_lbl)
sent_representation = layers.Multiply()([rnn_maps, attention])
sent_representation = layers.Multiply()([sent_representation, dense_cond])
images_with_attention = layers.Lambda(lambda xin: K.sum(xin, axis=-2),
                                            output_shape=(128,))(sent_representation)

z_mean = layers.Dense(64, activation='tanh')(images_with_attention)
z_log_var = layers.Dense(64, activation='tanh', name='z_logvar')(images_with_attention)

# z_latent = z_mean #sampling(z_mean, z_log_var)
z_latent = Sampling()([z_mean, z_log_var])
encoder = keras.Model(inputs=[true_bbxs, true_classes, true_masks],
                      outputs=[z_mean, z_log_var, z_latent], 
                      name='encoder')

# Decoder
cond_bbx = layers.Lambda(lambda xin: K.sum(xin, axis=-1), output_shape=(4,))(cond_bbxs)
cond_cat = cond_bbx

cond_fully_cat = layers.Dense(64, activation='relu')(cond_cat)
cond_class_ = layers.Dense(64, activation='relu')(cond_classes)
conditioned_z = layers.concatenate([cond_fully_cat, latent_inputs], axis=-1, name='conditioned_z_1')
conditioned_z = layers.concatenate([conditioned_z, cond_class_], axis=-1, name='conditioned_z_2')
decoded = layers.RepeatVector(max_num_node)(conditioned_z)
decoded = layers.Bidirectional(layers.GRU(32, return_sequences=True))(decoded)
dec_dense = layers.TimeDistributed(layers.Dense(12544, activation='relu',  name = 'encoding'))(decoded)
dec_conv = layers.TimeDistributed(layers.Reshape((28, 28, 16)))(dec_dense)

dec = layers.TimeDistributed(layers.Conv2DTranspose(16, kernel_size=3, padding='same'))(dec_conv)
dec = layers.TimeDistributed(layers.BatchNormalization(trainable = False))(dec)
dec = layers.TimeDistributed(layers.Activation('relu'))(dec)

dec = layers.TimeDistributed(layers.Conv2DTranspose(8, kernel_size=3))(dec)
dec = layers.TimeDistributed(layers.BatchNormalization(trainable = False))(dec)
dec = layers.TimeDistributed(layers.Activation('relu'))(dec)

dec = layers.TimeDistributed(layers.UpSampling2D(size=(2, 2)))(dec)

dec = layers.TimeDistributed(layers.Conv2DTranspose(8, kernel_size=3))(dec)
dec = layers.TimeDistributed(layers.BatchNormalization(trainable = False))(dec)
dec = layers.TimeDistributed(layers.Activation('relu'))(dec)

decoder_bitmaps = layers.TimeDistributed(layers.Conv2DTranspose(1, kernel_size=3,
                                                                            activation='sigmoid', 
                                                                            name = 'decoded_mask'))(dec)
decoder = keras.Model(inputs=[cond_bbxs, cond_classes, latent_inputs],
                      outputs=decoder_bitmaps, 
                      name='decoder')

In [7]:
encoder.summary()

Model: "encoder"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 16, 64, 64,  0                                            
__________________________________________________________________________________________________
time_distributed (TimeDistribut (None, 16, 62, 62, 8 80          input_3[0][0]                    
__________________________________________________________________________________________________
time_distributed_1 (TimeDistrib (None, 16, 62, 62, 8 32          time_distributed[0][0]           
__________________________________________________________________________________________________
time_distributed_2 (TimeDistrib (None, 16, 62, 62, 8 0           time_distributed_1[0][0]         
____________________________________________________________________________________________

In [15]:
class maskVAE(keras.Model):
    def __init__(self, encoder, decoder, **kwargs):
        super().__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
        self.train_mask_loss = keras.metrics.Mean(name="train_mask_loss")
        self.val_mask_loss = keras.metrics.Mean(name="val_mask_loss")
        self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")
        self.kl_weight = 0

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.train_mask_loss,
            self.kl_loss_tracker,
        ]
    
    def reconstruction_loss(self, true_masks, pred_masks, z_mean, z_logvar, z_latent):
    
        kl_loss = tf.reduce_mean(0.5 * tf.reduce_sum(tf.square(z_mean) + tf.square(tf.exp(z_logvar)) - 2*(z_logvar) - 1,
                                                     axis=1))
        mask_loss = tf.reduce_mean(tf.keras.backend.binary_crossentropy(true_masks,pred_masks))

        return mask_loss, kl_loss

    def train_step(self, data):
        print(data)
        (bx, cls, mask) = data[0]
        with tf.GradientTape() as tape:
            z_mean, z_log_var, z_latent = self.encoder(data)
            pred_mask = self.decoder((bx, cls, z_latent))
            mask_loss, kl_loss = self.reconstruction_loss(mask, pred_mask, z_mean, z_logvar, z_latent)
            total_loss = mask_loss + kl_loss*kl_weight
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.mask_loss.update_state(mask_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        if (kl_loss > 10.0 
            and abs(self._train_mask_loss.result() - self.val_mask_loss.result()) < 0.1 
            and self.kl_weight<0.5):
            self.kl_weight += 0.01 
        return {
            "loss": self.total_loss_tracker.result(),
            "mask_loss": self.mask_loss.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }
    
    def val_step(self, data):
        z_mean, z_log_var, z_latent = self.encoder(data)
        (bx, cls, mask) = data[0]
        pred_mask = self.decoder((bx, cls, z_latent))
        mask_loss, kl_loss = self.reconstruction_loss(mask, pred_mask, z_mean, z_logvar, z_latent)
        total_loss = mask_loss + kl_loss*kl_weight
        self.val_mask_loss.update_state(mask_loss)
        return {
            "val_mask_loss": self.total_loss_tracker.result()
        }


In [16]:
lr = 0.00001
file_postfix = '_combined_mask_data'

mask_vae_model = maskVAE(encoder, decoder)
X_train, class_v, masks, X_train_val, class_v_val, masks_val = load_data(file_postfix)
# train_dataset = tf.data.Dataset.from_tensor_slices((X_train[:,:,1:], class_v, masks))
# val_dataset = tf.data.Dataset.from_tensor_slices((X_train_val[:,:,1:], class_v_val, masks_val))

ckpt = [tf.keras.callbacks.ModelCheckpoint(
    filepath="D:/meronym_data/runs/mask_generation_model_tf2_reconstruction/lr00001/maskvae.ckpt",
    save_freq=10
)]

mask_vae_model.compile(optimizer=keras.optimizers.Adam(lr))
mask_vae_model.fit(((X_train[:,:,1:], class_v, masks)), epochs=200, batch_size=16, callbacks=ckpt, validation_data=(X_train_val[:,:,1:], class_v_val, masks_val), shuffle=True)
mask_vae_model.save("D:/meronym_data/runs/mask_generation_model_tf2_reconstruction/lr00001/maskvae_final")

Epoch 1/200
((<tf.Tensor 'IteratorGetNext:0' shape=(None, 16, 4) dtype=float32>, <tf.Tensor 'IteratorGetNext:1' shape=(None, 7) dtype=float32>, <tf.Tensor 'IteratorGetNext:2' shape=(None, 16, 64, 64, 1) dtype=float32>),)


NameError: in user code:

    C:\Users\user\miniconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:800 train_function  *
        return step_function(self, iterator)
    C:\Users\user\miniconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:790 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    C:\Users\user\miniconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1259 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    C:\Users\user\miniconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2730 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    C:\Users\user\miniconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:3417 _call_for_each_replica
        return fn(*args, **kwargs)
    C:\Users\user\miniconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:783 run_step  **
        outputs = model.train_step(data)
    C:\Users\user\AppData\Local\Temp\ipykernel_18772\2717656065.py:34 train_step
        mask_loss, kl_loss = self.reconstruction_loss(true_masks, pred_masks, z_mean, z_logvar, z_latent)

    NameError: name 'pred_masks' is not defined


In [10]:
train_dataset

<TensorSliceDataset shapes: ((16, 4), (7,), (16, 64, 64, 1)), types: (tf.float64, tf.float64, tf.float64)>

In [None]:
maskVAE = keras.models.Model(inputs=[true_masks, true_bbxs, true_classes, cond_bbxs, cond_classes],
                               outputs=[decoder_bitmaps, z_mean, z_log_var, z_latent])

maskVAE.summary()

In [None]:

lr = 0.00001
X_train, class_v, masks, X_train_val, class_v_val, masks_val = load_data(file_postfix)
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, class_v, masks))
val_dataset = tf.data.Dataset.from_tensor_slices((X_train_val, class_v_val, masks_val))

train_dataset = train_dataset.shuffle(100).batch(batch_size)
val_dataset = val_dataset.batch(batch_size)

labels = X_train[:,:,:1] 
bounding_boxes = X_train[:,:,1:]
masks = masks
classes = class_v

labels_val = X_train_val[:,:,:1] 
bounding_boxes_val = X_train_val[:,:,1:]
masks_val = masks_val
classes_val = class_v_val

opt = keras.optimizers.Adam(learning_rate=lr)
# maskVAE.compile(optimizer=opt)
nb_train = masks.shape[0]
n_epochs = 200
klw = 0.00001

for epoch in range(n_epochs):
  epoch_train_reconstruction_loss_avg = keras.metrics.Mean() # Keeping track of the training loss
  nb_batches = nb_train // batch_size
  start_time=time.time()
  for step, (x_in, cls_in, mx_in) in enumerate(train_dataset):
    labels = tf.cast(x_in[:,:,:1],dtype=tf.float32) 
    bx_in = tf.cast(x_in[:,:,1:],dtype=tf.float32)
    cls_in = tf.cast(cls_in, dtype=tf.float32)
    mx_in = tf.cast(mx_in, dtype=tf.float32)
    
    with tf.GradientTape() as tape: # Forward pass
      pred_masks, z_mean, z_logvar, z_latent = maskVAE([mx_in, bx_in, cls_in, bx_in, cls_in],
                                                       training=True)
      mask_loss, kl_loss = reconstruction_loss(mx_in, pred_masks, z_mean, z_logvar, z_latent)
      loss = mask_loss+klw*kl_loss
      
    grad = tape.gradient(loss, maskVAE.trainable_weights) # Backpropagation
    opt.apply_gradients(zip(grad, maskVAE.trainable_weights)) # Update network weights

    epoch_train_reconstruction_loss_avg(loss)
    
#   loss_train[epoch] = epoch_train_reconstruction_loss_avg.result()

  for step, (x_in, cls_in, mx_in) in enumerate(val_dataset): 
    labels = tf.cast(x_in[:,:,:1],dtype=tf.float32) 
    bx_in = tf.cast(x_in[:,:,1:],dtype=tf.float32)
    cls_in = tf.cast(cls_in, dtype=tf.float32)
    mx_in = tf.cast(mx_in, dtype=tf.float32)
    
    pred_masks, z_mean, z_logvar, z_latent = maskVAE([mx_in, bx_in, cls_in, bx_in, cls_in],
                                                     training=False) # Validation predictions
    mask_loss_val, kl_loss_val = reconstruction_loss(mx_in, pred_masks, z_mean, z_logvar, z_latent)
    break

  print('epoch:',epoch,
        'kl_weight', klw,
        'kl:', kl_loss .numpy(), 
        'train:', epoch_train_reconstruction_loss_avg.result().numpy(),
        'val:',mask_loss_val.numpy(),
        'time:', time.time()-start_time)
    
  if kl_loss > 10.0 and abs(mask_loss - mask_loss_val) < 0.1 and klw<0.5:
    klw += 0.01
  if epoch % 10 == 0:
    maskVAE.save("D:/meronym_data/runs/mask_generation_model_tf2_reconstruction/lr00001/maskvae.ckpt")

In [None]:
epoch_train = keras.metrics.Mean()
if not abs(epoch_train.result()-epoch_train.result())<0:
    print(8)