# Обучение различных FCRN с шорткатами из энкодера в декодер с адаптивным лоссом

# Загрузка датасета

In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

In [2]:
import tensorflow
import keras
import h5py
import os
import numpy as np

Using TensorFlow backend.


In [3]:
try:
    train_data_file.close()
except:
    pass
train_data_file = h5py.File('/home/kmouraviev/NYU_dataset_hdf5/train_data_fullsize_small.hdf5')
rgbs_train = np.array(train_data_file['data'])
depths_train = np.array(train_data_file['label'])

In [4]:
try:
    val_data_file.close()
except:
    pass
val_data_file = h5py.File('/home/kmouraviev/NYU_dataset_hdf5/validation_data_different_scenes.hdf5')
rgbs_val = np.array(val_data_file['data'])
depths_val = np.array(val_data_file['label'])

In [5]:
print(rgbs_train.shape, depths_train.shape)
print(rgbs_val.shape, depths_val.shape)
print(rgbs_train[0].min(), rgbs_train[0].max())
print(depths_train[0].min(), depths_train[0].max())

(41685, 384, 512, 3) (41685, 384, 512)
(14266, 384, 512, 3) (14266, 384, 512)
-122.68 150.061
1.74775976467378 4.2530741218842865


# ResNet c Deconvolution-декодером и шорткатами 

## Создание модели

In [6]:
from tensorflow_resnet.my_batch_normalization import MyBatchNormalization

ModuleNotFoundError: No module named 'tensorflow_resnet'

In [None]:
from keras.models import Model, load_model
from keras.layers import *
import keras.backend as K
import tensorflow as tf
from keras.applications.resnet50 import ResNet50

In [None]:
K.clear_session()

In [None]:
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.3
config.gpu_options.allow_growth = True
session = tf.Session(config=config)
K.set_session(session)

In [None]:
def apply_upprojection(nn):
    n_filters = int(nn.shape[-1])
    batchnorm = BatchNormalization()(nn)
    upsampling = UpSampling2D()(batchnorm)
    conv = Conv2D(n_filters // 2, kernel_size=3, padding='same')(upsampling)
    relu = Activation('relu')(conv)
    conv_3 = Conv2D(n_filters // 2, kernel_size=3, padding='same')(relu)
    conv_proj = Conv2D(n_filters // 2, kernel_size=3, padding='same')(upsampling)
    add = Add()([conv_3, conv_proj])
    relu_2 = Activation('relu')(add)
    dropout = Dropout(0.5)(relu_2)
    return dropout


def apply_upconvolution(nn):
    n_filters = int(nn.shape[-1])
    batchnorm = BatchNormalization()(nn)
    upsampling = UpSampling2D()(batchnorm)
    conv = Conv2D(n_filters // 2, kernel_size=3, padding='same')(upsampling)
    relu = Activation('relu')(conv)
    dropout = Dropout(0.5)(relu)
    return dropout


def apply_deconvolution(nn, n_output_filters):
    batchnorm = MyBatchNormalization()(nn, training=None)
    deconv = Conv2DTranspose(n_output_filters,
                             kernel_size=(5, 5),
                             strides=(2, 2),
                             padding='same',
                             output_padding=1
                            )(batchnorm)
    relu = Activation('relu')(deconv)
    dropout = Dropout(0.5)(relu)
    return dropout


def apply_nonbt_1d(nn, n_output_filters, k):
    conv1 = Conv2D(n_output_filters, 
                   kernel_size=(3, 1), 
                   padding='same', 
                   name='nonbt{}_conv1'.format(k)
                  )(nn)
    conv2 = Conv2D(n_output_filters,
                   kernel_size=(1, 3),
                   padding='same',
                   name='nonbt{}_conv2'.format(k)
                  )(conv1)
    bn1 = MyBatchNormalization()(conv2, training=None, name='bn_nonbth_' + str(k))
    conv3 = Conv2D(n_output_filters,
                   kernel_size=(3, 1),
                   padding='same',
                   name='nonbt{}_conv3'.format(k)
                  )(bn1)
    conv4 = Conv2D(n_output_filters, 
                   kernel_size=(1, 3),
                   padding='same',
                   name='nonbt{}_conv4'.format(k)
                  )(conv3)
    return conv4


def apply_upconv_nonbt(nn, n_output_filters):
    batchnorm = BatchNormalization()(nn)
    upsampling = UpSampling2D()(batchnorm)
    nonbt = apply_nonbt_1d(upsampling, n_output_filters)
    relu = Activation('relu')(nonbt)
    return relu
    

def create_fcrn_model(encoder, deconv_type='projection', h=224, w=224, use_nonbt_blocks=False):
    for layer in encoder.layers:
        layer.trainable = True
    for layer in encoder.layers:
        if layer.name == 'my_batch_normalization_1':
            conv1 = layer.output
        if layer.name == 'add_3':
            add3 = layer.output
        if layer.name == 'add_7':
            add7 = layer.output
        if layer.name == 'add_13':
            add13 = layer.output
    print(add3.shape, add7.shape, add13.shape)
    encoder.compile(loss='mean_squared_error', optimizer='adam')
    resnet_output = encoder.output
    nn = Conv2D(1024, kernel_size=1, padding='same', name='starting_conv')(resnet_output)
    for k in range(5):
        if k == 1:
            nn = concatenate([add13, nn], axis=-1)
        if k == 2:
            nn = concatenate([add7, nn], axis=-1)
        if k == 3:
            nn = concatenate([add3, nn], axis=-1)
        if k == 4:
            nn = concatenate([conv1, nn], axis=-1)
        if k == 0:
            n_filters = int(nn.shape[-1]) // 2
        elif k == 1 or k == 4:
            n_filters = int(nn.shape[-1]) // 3
        else:
            n_filters = int(nn.shape[-1]) // 4
        if deconv_type == 'projection':
            nn = apply_upprojection(nn, n_filters)
        if deconv_type ==  'convolution':
            nn = apply_upconvolution(nn, n_filters)
        if deconv_type == 'deconvolution':
            nn = apply_deconvolution(nn, n_filters)
            if use_nonbt_blocks and k > 0 and k < 4:
                nn = apply_nonbt_1d(nn, n_filters)
        if deconv_type == 'conv_nonbt':
            nn = apply_upconv_nonbt(nn, n_filters, k)
    depth_output = Conv2D(1, kernel_size=3, padding='same', name='final_conv')(nn)
    depth_output = Activation('relu', name='final_relu')(depth_output)
    depth_output = Reshape((h, w))(depth_output)
    fcrn_model = Model(inputs=encoder.input, outputs=depth_output)
    for layer in fcrn_model.layers:
        layer.trainable = True
    return fcrn_model

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from keras.models import load_model
encoder = load_model('models/resnet50/resnet_encoder_coco.hdf5',
                     custom_objects={'MyBatchNormalization': MyBatchNormalization})
encoder.summary()

In [None]:
while True:
    try:
        model = create_fcrn_model(encoder, deconv_type='conv_nonbt', h=384, w=512)
        model.summary()
    except:
        continue
    break

## Обучение на малом датасете с адаптивным лоссом с использованием Tensorflow

In [None]:
from experiment_utils.training import train_model
from experiment_utils.callbacks import LoggingCallback
from keras.optimizers import Adam

In [None]:
from keras.utils import multi_gpu_model

In [None]:
model_gpu = multi_gpu_model(model, gpus=4)

In [None]:
k = tf.Variable(1.0)
delta = tf.constant(0.1)
delta_lr = tf.constant(1e-2)
labels_plh = tf.placeholder(tf.float32, shape=(None, 384, 512))
predictions = model_gpu.output

# define k adjusting
error = tf.abs(labels_plh - predictions)
pixels_above_k = tf.cast(tf.logical_and(labels_plh > k, labels_plh < k + delta), tf.float32)
pixels_below_k = tf.cast(tf.logical_and(labels_plh < k, labels_plh > k - delta), tf.float32)
error_above_k = tf.reduce_sum(pixels_above_k * error) / tf.reduce_sum(pixels_above_k)
error_below_k = tf.reduce_sum(pixels_below_k * error) / tf.reduce_sum(pixels_below_k)
dk = tf.where(error_above_k > error_below_k,
              delta * delta_lr,
              -delta * delta_lr)
change_k = k.assign(tf.maximum(k + dk, delta))

# define loss: berhu on pixels < k, mse on pixels > k
berhu_threshold = tf.minimum(0.2 * tf.reduce_max(error), 0.5)
berhu_loss = tf.where(error < berhu_threshold,
                      error, 
                      (error ** 2 + berhu_threshold ** 2) / (2 * berhu_threshold))
square_loss = error ** 2
loss = tf.where(labels_plh < k,
                berhu_loss,
                square_loss)
loss_mean = tf.reduce_mean(loss)
mse = tf.reduce_mean(square_loss)

# define optimizer
optimizer = tf.train.AdamOptimizer(1e-4)
train_step = optimizer.minimize(loss_mean)

In [None]:
adam_initializers = [var.initializer for var in optimizer.variables()]
session.run(adam_initializers)
session.run(k.initializer)

In [None]:
from IPython.display import clear_output

In [None]:
def running_mean(x, n):
    cumsum = np.cumsum(x)
    return (cumsum[n:] - cumsum[:-n]) / n

In [None]:
n_epochs = 4
batch_size = 16
verbose_freq = 10
loss_history = []
mse_history = []
k_history = []
for epoch in range(n_epochs):
    ids = np.arange(len(rgbs_train))
    np.random.shuffle(ids)
    for i in range(0, len(rgbs_train), batch_size):
        rgbs_batch = rgbs_train[ids[i:min(i + batch_size, len(rgbs_train))]]
        depths_batch = depths_train[ids[i:min(i + batch_size, len(rgbs_train))]]
        k_value, _, loss_value, mse_value, __ = session.run([k, change_k, loss_mean, mse, train_step],
                                                 feed_dict={model_gpu.input: rgbs_batch,
                                                            labels_plh: depths_batch
                                                           }
                                                )
        loss_history.append(loss_value)
        mse_history.append(mse_value)
        k_history.append(k_value)
        if i % verbose_freq == 0:
            clear_output()
            # print loss and k
            print('average loss over last {} batches: {}'.format(verbose_freq, np.mean(loss_history[-10:])))
            print('average MSE over last {} batches: {}'.format(verbose_freq, np.mean(mse_history[-10:])))
            print('k: {}'.format(k_history[-1]))
            
            if i > 100:
                # plot loss and mse
                plt.figure(figsize=(16, 16))
                plt.subplot(2, 1, 1)
                plt.plot(running_mean(loss_history, 100), label='loss')
                plt.plot(running_mean(mse_history, 100), label='MSE')
                plt.legend(fontsize=16)
                plt.grid(ls=':')
                
                # plot k
                plt.subplot(2, 2, 1)
                plt.plot(running_mean(k_history, 100), label='k')
                plt.legend(fontsize=16)
                plt.grid(ls=':')